From 266d63a7d9d48c6d5dee486378ec0e8c86c4d74a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 11 Feb 2019 12:23:51 +0100 Subject: x86/cpufeature: Fix various quality problems in the header Thomas noticed that the new arch/x86/include/asm/cpu_device_id.h header is a train-wreck that didn't incorporate review feedback like not using __u8 in kernel-only headers. While at it also fix all the *other* problems this header has: - Use canonical names for the header guards. It's inexplicable why a non-standard guard was used. - Don't define the header guard to 1. Plus annotate the closing #endif as done absolutely every other header. Again, an inexplicable source of noise. - Move the kernel API calls provided by this header next to each other, there's absolutely no reason to have them spread apart in the header. - Align the INTEL_CPU_DESC() macro initializations vertically, this is easier to read and it's also the canonical style. - Actually name the macro arguments properly: instead of 'mod, step, rev', spell out 'model, stepping, revision' - it's not like we have a lack of characters in this header. - Actually make arguments macro-safe - again it's inexplicable why it wasn't done properly to begin with. Quite amazing how many problems a 41 lines header can contain. This kind of code quality is unacceptable, and it slipped through the review net of 2 developers and 2 maintainers, including myself, until Thomas noticed it. :-/ Reported-by: Thomas Gleixner Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpu_device_id.h | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index 3417110574c1..31c379c1da41 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _CPU_DEVICE_ID -#define _CPU_DEVICE_ID 1 +#ifndef _ASM_X86_CPU_DEVICE_ID +#define _ASM_X86_CPU_DEVICE_ID /* * Declare drivers belonging to specific x86 CPUs @@ -9,8 +9,6 @@ #include -extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); - /* * Match specific microcode revisions. * @@ -22,21 +20,22 @@ extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); */ struct x86_cpu_desc { - __u8 x86_family; - __u8 x86_vendor; - __u8 x86_model; - __u8 x86_stepping; - __u32 x86_microcode_rev; + u8 x86_family; + u8 x86_vendor; + u8 x86_model; + u8 x86_stepping; + u32 x86_microcode_rev; }; -#define INTEL_CPU_DESC(mod, step, rev) { \ - .x86_family = 6, \ - .x86_vendor = X86_VENDOR_INTEL, \ - .x86_model = mod, \ - .x86_stepping = step, \ - .x86_microcode_rev = rev, \ +#define INTEL_CPU_DESC(model, stepping, revision) { \ + .x86_family = 6, \ + .x86_vendor = X86_VENDOR_INTEL, \ + .x86_model = (model), \ + .x86_stepping = (stepping), \ + .x86_microcode_rev = (revision), \ } +extern const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match); extern bool x86_cpu_has_min_microcode_rev(const struct x86_cpu_desc *table); -#endif +#endif /* _ASM_X86_CPU_DEVICE_ID */ -- cgit From 13bcb80b7ee79431fce361e060611134cb19e209 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Wed, 20 Feb 2019 16:25:04 +0800 Subject: drm/i915/gvt: Fix MI_FLUSH_DW parsing with correct index check When MI_FLUSH_DW post write hw status page in index mode, the index value is in dword step and turned into address offset in cmd dword1. As status page size is 4K, so can't exceed that. This fixed upper bound check in cmd parser code which incorrectly stopped VM for reason of invalid MI_FLUSH_DW write index. v2: - Fix upper bound as 4K page size because index value is address offset. Fixes: be1da7070aea ("drm/i915/gvt: vGPU command scanner") Cc: stable@vger.kernel.org # v4.10+ Cc: "Zhao, Yan Y" Reviewed-by: Yan Zhao Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/cmd_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 77ae634eb11c..bd95fd6b4ac8 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -1446,7 +1446,7 @@ static inline int cmd_address_audit(struct parser_exec_state *s, } if (index_mode) { - if (guest_gma >= I915_GTT_PAGE_SIZE / sizeof(u64)) { + if (guest_gma >= I915_GTT_PAGE_SIZE) { ret = -EFAULT; goto err; } -- cgit From 1e8b15a1988ed3c7429402017d589422628cdf47 Mon Sep 17 00:00:00 2001 From: Colin Xu Date: Fri, 22 Feb 2019 14:13:42 +0800 Subject: drm/i915/gvt: Add in context mmio 0x20D8 to gen9 mmio list Depends on GEN family and I915_PARAM_HAS_CONTEXT_ISOLATION, Mesa driver will decide whether constant buffer 0 address is relative or absolute, and load GPU initial state by lri to context mmio INSTPM (GEN8) or 0x20D8 (>=GEN9). Mesa Commit fa8a764b62 ("i965: Use absolute addressing for constant buffer 0 on Kernel 4.16+.") INSTPM is already added to gen8_engine_mmio_list, but 0x20D8 is missed in gen9_engine_mmio_list. From GVT point of view, different guest could have different context so should switch those mmio accordingly. v2: Update fixes commit ID. Fixes: 178657139307 ("drm/i915/gvt: vGPU context switch") Reviewed-by: Zhenyu Wang Signed-off-by: Colin Xu Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/mmio_context.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index d6e02c15ef97..38492450552a 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -132,6 +132,7 @@ static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = { {RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */ {RCS, GEN9_CSFE_CHICKEN1_RCS, 0xffff, false}, /* 0x20d4 */ + {RCS, _MMIO(0x20D8), 0xffff, true}, /* 0x20d8 */ {RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */ {RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */ -- cgit From e20119f7eaaaf6aad5b44f35155ce500429e17f6 Mon Sep 17 00:00:00 2001 From: Takeshi Kihara Date: Thu, 21 Feb 2019 13:59:38 +0100 Subject: arm64: dts: renesas: r8a77990: Fix SCIF5 DMA channels According to the R-Car Gen3 Hardware Manual Errata for Rev 1.50 of Feb 12, 2019, the DMA channels for SCIF5 are corrected from 16..47 to 0..15 on R-Car E3. Signed-off-by: Takeshi Kihara Fixes: a5ebe5e49a862e21 ("arm64: dts: renesas: r8a77990: Add SCIF-{0,1,3,4,5} device nodes") Signed-off-by: Geert Uytterhoeven Reviewed-by: Fabrizio Castro Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/r8a77990.dtsi | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a77990.dtsi b/arch/arm64/boot/dts/renesas/r8a77990.dtsi index 786178cf1ffd..36f409091cfc 100644 --- a/arch/arm64/boot/dts/renesas/r8a77990.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a77990.dtsi @@ -2,7 +2,7 @@ /* * Device Tree Source for the R-Car E3 (R8A77990) SoC * - * Copyright (C) 2018 Renesas Electronics Corp. + * Copyright (C) 2018-2019 Renesas Electronics Corp. */ #include @@ -1042,9 +1042,8 @@ <&cpg CPG_CORE R8A77990_CLK_S3D1C>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; - dmas = <&dmac1 0x5b>, <&dmac1 0x5a>, - <&dmac2 0x5b>, <&dmac2 0x5a>; - dma-names = "tx", "rx", "tx", "rx"; + dmas = <&dmac0 0x5b>, <&dmac0 0x5a>; + dma-names = "tx", "rx"; power-domains = <&sysc R8A77990_PD_ALWAYS_ON>; resets = <&cpg 202>; status = "disabled"; -- cgit From c21cd4ae82e169b394139243684aaacf31bfcdf8 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 21 Feb 2019 15:04:28 +0100 Subject: arm64: dts: renesas: r8a774c0: Fix SCIF5 DMA channels Correct the DMA channels for SCIF5 from 16..47 to 0..15, as was done for R-Car E3. Signed-off-by: Takeshi Kihara Fixes: 2660a6af690ebbb4 ("arm64: dts: renesas: r8a774c0: Add SCIF and HSCIF nodes") Signed-off-by: Geert Uytterhoeven Reviewed-by: Fabrizio Castro Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/r8a774c0.dtsi | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi index f2e390f7f1d5..6590b63268b0 100644 --- a/arch/arm64/boot/dts/renesas/r8a774c0.dtsi +++ b/arch/arm64/boot/dts/renesas/r8a774c0.dtsi @@ -2,7 +2,7 @@ /* * Device Tree Source for the RZ/G2E (R8A774C0) SoC * - * Copyright (C) 2018 Renesas Electronics Corp. + * Copyright (C) 2018-2019 Renesas Electronics Corp. */ #include @@ -990,9 +990,8 @@ <&cpg CPG_CORE R8A774C0_CLK_S3D1C>, <&scif_clk>; clock-names = "fck", "brg_int", "scif_clk"; - dmas = <&dmac1 0x5b>, <&dmac1 0x5a>, - <&dmac2 0x5b>, <&dmac2 0x5a>; - dma-names = "tx", "rx", "tx", "rx"; + dmas = <&dmac0 0x5b>, <&dmac0 0x5a>; + dma-names = "tx", "rx"; power-domains = <&sysc R8A774C0_PD_ALWAYS_ON>; resets = <&cpg 202>; status = "disabled"; -- cgit From ef4c54c340de47bf167b326f7560c1cc3751d154 Mon Sep 17 00:00:00 2001 From: Alexey Brodkin Date: Thu, 24 Jan 2019 15:17:03 +0300 Subject: ARC: DTB: [scripted] fix node name and address spelling 1. Remove "0x" prefix from unit-address of node names ----------------------->8------------------------ sed -i 's/@0x/@/g' arch/arc/boot/dts/*.dts* ----------------------->8------------------------ 2. Make all hex addresses lowercase: ----------------------->8------------------------ sed -i 's/@\([0-9A-Za-z]*\)/@\L\1/g' arch/arc/boot/dts/*.dts* sed -i 's/0x\([0-9A-Za-z]*\)/0x\L\1/g' arch/arc/boot/dts/*.dts* ----------------------->8------------------------ Inspired by [1] and the like. [1] http://kisskb.ellerman.id.au/kisskb/buildresult/13612017/ Reviewed-by: Rob Herring Signed-off-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/abilis_tb100.dtsi | 58 ++++++++++++++++---------------- arch/arc/boot/dts/abilis_tb100_dvk.dts | 14 ++++---- arch/arc/boot/dts/abilis_tb101.dtsi | 58 ++++++++++++++++---------------- arch/arc/boot/dts/abilis_tb101_dvk.dts | 14 ++++---- arch/arc/boot/dts/abilis_tb10x.dtsi | 60 +++++++++++++++++----------------- arch/arc/boot/dts/axc001.dtsi | 6 ++-- arch/arc/boot/dts/axc003.dtsi | 16 ++++----- arch/arc/boot/dts/axc003_idu.dtsi | 16 ++++----- arch/arc/boot/dts/axs10x_mb.dtsi | 22 ++++++------- arch/arc/boot/dts/hsdk.dts | 4 +-- arch/arc/boot/dts/vdk_axc003.dtsi | 4 +-- arch/arc/boot/dts/vdk_axc003_idu.dtsi | 4 +-- arch/arc/boot/dts/vdk_axs10x_mb.dtsi | 18 +++++----- 13 files changed, 147 insertions(+), 147 deletions(-) diff --git a/arch/arc/boot/dts/abilis_tb100.dtsi b/arch/arc/boot/dts/abilis_tb100.dtsi index 02410b211433..c0bcd97522bb 100644 --- a/arch/arc/boot/dts/abilis_tb100.dtsi +++ b/arch/arc/boot/dts/abilis_tb100.dtsi @@ -38,7 +38,7 @@ clock-div = <6>; }; - iomux: iomux@FF10601c { + iomux: iomux@ff10601c { /* Port 1 */ pctl_tsin_s0: pctl-tsin-s0 { /* Serial TS-in 0 */ abilis,function = "mis0"; @@ -162,182 +162,182 @@ }; }; - gpioa: gpio@FF140000 { + gpioa: gpio@ff140000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF140000 0x1000>; + reg = <0xff140000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioa"; }; - gpiob: gpio@FF141000 { + gpiob: gpio@ff141000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF141000 0x1000>; + reg = <0xff141000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiob"; }; - gpioc: gpio@FF142000 { + gpioc: gpio@ff142000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF142000 0x1000>; + reg = <0xff142000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioc"; }; - gpiod: gpio@FF143000 { + gpiod: gpio@ff143000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF143000 0x1000>; + reg = <0xff143000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiod"; }; - gpioe: gpio@FF144000 { + gpioe: gpio@ff144000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF144000 0x1000>; + reg = <0xff144000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioe"; }; - gpiof: gpio@FF145000 { + gpiof: gpio@ff145000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF145000 0x1000>; + reg = <0xff145000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiof"; }; - gpiog: gpio@FF146000 { + gpiog: gpio@ff146000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF146000 0x1000>; + reg = <0xff146000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiog"; }; - gpioh: gpio@FF147000 { + gpioh: gpio@ff147000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF147000 0x1000>; + reg = <0xff147000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioh"; }; - gpioi: gpio@FF148000 { + gpioi: gpio@ff148000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF148000 0x1000>; + reg = <0xff148000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <12>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioi"; }; - gpioj: gpio@FF149000 { + gpioj: gpio@ff149000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF149000 0x1000>; + reg = <0xff149000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <32>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioj"; }; - gpiok: gpio@FF14a000 { + gpiok: gpio@ff14a000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14A000 0x1000>; + reg = <0xff14a000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <22>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiok"; }; - gpiol: gpio@FF14b000 { + gpiol: gpio@ff14b000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14B000 0x1000>; + reg = <0xff14b000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <4>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiol"; }; - gpiom: gpio@FF14c000 { + gpiom: gpio@ff14c000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14C000 0x1000>; + reg = <0xff14c000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <4>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiom"; }; - gpion: gpio@FF14d000 { + gpion: gpio@ff14d000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14D000 0x1000>; + reg = <0xff14d000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <5>; diff --git a/arch/arc/boot/dts/abilis_tb100_dvk.dts b/arch/arc/boot/dts/abilis_tb100_dvk.dts index 3acf04db8030..c968e677db46 100644 --- a/arch/arc/boot/dts/abilis_tb100_dvk.dts +++ b/arch/arc/boot/dts/abilis_tb100_dvk.dts @@ -37,27 +37,27 @@ }; soc100 { - uart@FF100000 { + uart@ff100000 { pinctrl-names = "default"; pinctrl-0 = <&pctl_uart0>; }; - ethernet@FE100000 { + ethernet@fe100000 { phy-mode = "rgmii"; }; - i2c0: i2c@FF120000 { + i2c0: i2c@ff120000 { i2c-sda-hold-time-ns = <432>; }; - i2c1: i2c@FF121000 { + i2c1: i2c@ff121000 { i2c-sda-hold-time-ns = <432>; }; - i2c2: i2c@FF122000 { + i2c2: i2c@ff122000 { i2c-sda-hold-time-ns = <432>; }; - i2c3: i2c@FF123000 { + i2c3: i2c@ff123000 { i2c-sda-hold-time-ns = <432>; }; - i2c4: i2c@FF124000 { + i2c4: i2c@ff124000 { i2c-sda-hold-time-ns = <432>; }; diff --git a/arch/arc/boot/dts/abilis_tb101.dtsi b/arch/arc/boot/dts/abilis_tb101.dtsi index f9e7686044eb..6a1615f58f05 100644 --- a/arch/arc/boot/dts/abilis_tb101.dtsi +++ b/arch/arc/boot/dts/abilis_tb101.dtsi @@ -38,7 +38,7 @@ clock-div = <6>; }; - iomux: iomux@FF10601c { + iomux: iomux@ff10601c { /* Port 1 */ pctl_tsin_s0: pctl-tsin-s0 { /* Serial TS-in 0 */ abilis,function = "mis0"; @@ -171,182 +171,182 @@ }; }; - gpioa: gpio@FF140000 { + gpioa: gpio@ff140000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF140000 0x1000>; + reg = <0xff140000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioa"; }; - gpiob: gpio@FF141000 { + gpiob: gpio@ff141000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF141000 0x1000>; + reg = <0xff141000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiob"; }; - gpioc: gpio@FF142000 { + gpioc: gpio@ff142000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF142000 0x1000>; + reg = <0xff142000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioc"; }; - gpiod: gpio@FF143000 { + gpiod: gpio@ff143000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF143000 0x1000>; + reg = <0xff143000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiod"; }; - gpioe: gpio@FF144000 { + gpioe: gpio@ff144000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF144000 0x1000>; + reg = <0xff144000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioe"; }; - gpiof: gpio@FF145000 { + gpiof: gpio@ff145000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF145000 0x1000>; + reg = <0xff145000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiof"; }; - gpiog: gpio@FF146000 { + gpiog: gpio@ff146000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF146000 0x1000>; + reg = <0xff146000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <3>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiog"; }; - gpioh: gpio@FF147000 { + gpioh: gpio@ff147000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF147000 0x1000>; + reg = <0xff147000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <2>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioh"; }; - gpioi: gpio@FF148000 { + gpioi: gpio@ff148000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF148000 0x1000>; + reg = <0xff148000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <12>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioi"; }; - gpioj: gpio@FF149000 { + gpioj: gpio@ff149000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF149000 0x1000>; + reg = <0xff149000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <32>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpioj"; }; - gpiok: gpio@FF14a000 { + gpiok: gpio@ff14a000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14A000 0x1000>; + reg = <0xff14a000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <22>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiok"; }; - gpiol: gpio@FF14b000 { + gpiol: gpio@ff14b000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14B000 0x1000>; + reg = <0xff14b000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <4>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiol"; }; - gpiom: gpio@FF14c000 { + gpiom: gpio@ff14c000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14C000 0x1000>; + reg = <0xff14c000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <4>; gpio-ranges = <&iomux 0 0 0>; gpio-ranges-group-names = "gpiom"; }; - gpion: gpio@FF14d000 { + gpion: gpio@ff14d000 { compatible = "abilis,tb10x-gpio"; interrupt-controller; #interrupt-cells = <1>; interrupt-parent = <&tb10x_ictl>; interrupts = <27 2>; - reg = <0xFF14D000 0x1000>; + reg = <0xff14d000 0x1000>; gpio-controller; #gpio-cells = <2>; abilis,ngpio = <5>; diff --git a/arch/arc/boot/dts/abilis_tb101_dvk.dts b/arch/arc/boot/dts/abilis_tb101_dvk.dts index 37d88c5dd181..05143ce9c120 100644 --- a/arch/arc/boot/dts/abilis_tb101_dvk.dts +++ b/arch/arc/boot/dts/abilis_tb101_dvk.dts @@ -37,27 +37,27 @@ }; soc100 { - uart@FF100000 { + uart@ff100000 { pinctrl-names = "default"; pinctrl-0 = <&pctl_uart0>; }; - ethernet@FE100000 { + ethernet@fe100000 { phy-mode = "rgmii"; }; - i2c0: i2c@FF120000 { + i2c0: i2c@ff120000 { i2c-sda-hold-time-ns = <432>; }; - i2c1: i2c@FF121000 { + i2c1: i2c@ff121000 { i2c-sda-hold-time-ns = <432>; }; - i2c2: i2c@FF122000 { + i2c2: i2c@ff122000 { i2c-sda-hold-time-ns = <432>; }; - i2c3: i2c@FF123000 { + i2c3: i2c@ff123000 { i2c-sda-hold-time-ns = <432>; }; - i2c4: i2c@FF124000 { + i2c4: i2c@ff124000 { i2c-sda-hold-time-ns = <432>; }; diff --git a/arch/arc/boot/dts/abilis_tb10x.dtsi b/arch/arc/boot/dts/abilis_tb10x.dtsi index 3121536b25a3..2fbf1bdfe6de 100644 --- a/arch/arc/boot/dts/abilis_tb10x.dtsi +++ b/arch/arc/boot/dts/abilis_tb10x.dtsi @@ -54,7 +54,7 @@ #size-cells = <1>; device_type = "soc"; ranges = <0xfe000000 0xfe000000 0x02000000 - 0x000F0000 0x000F0000 0x00010000>; + 0x000f0000 0x000f0000 0x00010000>; compatible = "abilis,tb10x", "simple-bus"; pll0: oscillator { @@ -75,10 +75,10 @@ clock-output-names = "ahb_clk"; }; - iomux: iomux@FF10601c { + iomux: iomux@ff10601c { compatible = "abilis,tb10x-iomux"; #gpio-range-cells = <3>; - reg = <0xFF10601c 0x4>; + reg = <0xff10601c 0x4>; }; intc: interrupt-controller { @@ -88,7 +88,7 @@ }; tb10x_ictl: pic@fe002000 { compatible = "abilis,tb10x-ictl"; - reg = <0xFE002000 0x20>; + reg = <0xfe002000 0x20>; interrupt-controller; #interrupt-cells = <2>; interrupt-parent = <&intc>; @@ -96,27 +96,27 @@ 20 21 22 23 24 25 26 27 28 29 30 31>; }; - uart@FF100000 { + uart@ff100000 { compatible = "snps,dw-apb-uart"; - reg = <0xFF100000 0x100>; + reg = <0xff100000 0x100>; clock-frequency = <166666666>; interrupts = <25 8>; reg-shift = <2>; reg-io-width = <4>; interrupt-parent = <&tb10x_ictl>; }; - ethernet@FE100000 { + ethernet@fe100000 { compatible = "snps,dwmac-3.70a","snps,dwmac"; - reg = <0xFE100000 0x1058>; + reg = <0xfe100000 0x1058>; interrupt-parent = <&tb10x_ictl>; interrupts = <6 8>; interrupt-names = "macirq"; clocks = <&ahb_clk>; clock-names = "stmmaceth"; }; - dma@FE000000 { + dma@fe000000 { compatible = "snps,dma-spear1340"; - reg = <0xFE000000 0x400>; + reg = <0xfe000000 0x400>; interrupt-parent = <&tb10x_ictl>; interrupts = <14 8>; dma-channels = <6>; @@ -132,70 +132,70 @@ multi-block = <1 1 1 1 1 1>; }; - i2c0: i2c@FF120000 { + i2c0: i2c@ff120000 { #address-cells = <1>; #size-cells = <0>; compatible = "snps,designware-i2c"; - reg = <0xFF120000 0x1000>; + reg = <0xff120000 0x1000>; interrupt-parent = <&tb10x_ictl>; interrupts = <12 8>; clocks = <&ahb_clk>; }; - i2c1: i2c@FF121000 { + i2c1: i2c@ff121000 { #address-cells = <1>; #size-cells = <0>; compatible = "snps,designware-i2c"; - reg = <0xFF121000 0x1000>; + reg = <0xff121000 0x1000>; interrupt-parent = <&tb10x_ictl>; interrupts = <12 8>; clocks = <&ahb_clk>; }; - i2c2: i2c@FF122000 { + i2c2: i2c@ff122000 { #address-cells = <1>; #size-cells = <0>; compatible = "snps,designware-i2c"; - reg = <0xFF122000 0x1000>; + reg = <0xff122000 0x1000>; interrupt-parent = <&tb10x_ictl>; interrupts = <12 8>; clocks = <&ahb_clk>; }; - i2c3: i2c@FF123000 { + i2c3: i2c@ff123000 { #address-cells = <1>; #size-cells = <0>; compatible = "snps,designware-i2c"; - reg = <0xFF123000 0x1000>; + reg = <0xff123000 0x1000>; interrupt-parent = <&tb10x_ictl>; interrupts = <12 8>; clocks = <&ahb_clk>; }; - i2c4: i2c@FF124000 { + i2c4: i2c@ff124000 { #address-cells = <1>; #size-cells = <0>; compatible = "snps,designware-i2c"; - reg = <0xFF124000 0x1000>; + reg = <0xff124000 0x1000>; interrupt-parent = <&tb10x_ictl>; interrupts = <12 8>; clocks = <&ahb_clk>; }; - spi0: spi@0xFE010000 { + spi0: spi@fe010000 { #address-cells = <1>; #size-cells = <0>; cell-index = <0>; compatible = "abilis,tb100-spi"; num-cs = <1>; - reg = <0xFE010000 0x20>; + reg = <0xfe010000 0x20>; interrupt-parent = <&tb10x_ictl>; interrupts = <26 8>; clocks = <&ahb_clk>; }; - spi1: spi@0xFE011000 { + spi1: spi@fe011000 { #address-cells = <1>; #size-cells = <0>; cell-index = <1>; compatible = "abilis,tb100-spi"; num-cs = <2>; - reg = <0xFE011000 0x20>; + reg = <0xfe011000 0x20>; interrupt-parent = <&tb10x_ictl>; interrupts = <10 8>; clocks = <&ahb_clk>; @@ -226,23 +226,23 @@ interrupts = <20 2>, <19 2>; interrupt-names = "cmd_irq", "event_irq"; }; - tb10x_mdsc0: tb10x-mdscr@FF300000 { + tb10x_mdsc0: tb10x-mdscr@ff300000 { compatible = "abilis,tb100-mdscr"; - reg = <0xFF300000 0x7000>; + reg = <0xff300000 0x7000>; tb100-mdscr-manage-tsin; }; - tb10x_mscr0: tb10x-mdscr@FF307000 { + tb10x_mscr0: tb10x-mdscr@ff307000 { compatible = "abilis,tb100-mdscr"; - reg = <0xFF307000 0x7000>; + reg = <0xff307000 0x7000>; }; tb10x_scr0: tb10x-mdscr@ff30e000 { compatible = "abilis,tb100-mdscr"; - reg = <0xFF30e000 0x4000>; + reg = <0xff30e000 0x4000>; tb100-mdscr-manage-tsin; }; tb10x_scr1: tb10x-mdscr@ff312000 { compatible = "abilis,tb100-mdscr"; - reg = <0xFF312000 0x4000>; + reg = <0xff312000 0x4000>; tb100-mdscr-manage-tsin; }; tb10x_wfb: tb10x-wfb@ff319000 { diff --git a/arch/arc/boot/dts/axc001.dtsi b/arch/arc/boot/dts/axc001.dtsi index fdc266504ada..37be3bf03ad6 100644 --- a/arch/arc/boot/dts/axc001.dtsi +++ b/arch/arc/boot/dts/axc001.dtsi @@ -41,7 +41,7 @@ * this GPIO block ORs all interrupts on CPU card (creg,..) * to uplink only 1 IRQ to ARC core intc */ - dw-apb-gpio@0x2000 { + dw-apb-gpio@2000 { compatible = "snps,dw-apb-gpio"; reg = < 0x2000 0x80 >; #address-cells = <1>; @@ -60,7 +60,7 @@ }; }; - debug_uart: dw-apb-uart@0x5000 { + debug_uart: dw-apb-uart@5000 { compatible = "snps,dw-apb-uart"; reg = <0x5000 0x100>; clock-frequency = <33333000>; @@ -88,7 +88,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@0xe0012000 { + mb_intc: dw-apb-ictl@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; diff --git a/arch/arc/boot/dts/axc003.dtsi b/arch/arc/boot/dts/axc003.dtsi index d75d65ddf8e3..effa37536d7a 100644 --- a/arch/arc/boot/dts/axc003.dtsi +++ b/arch/arc/boot/dts/axc003.dtsi @@ -55,7 +55,7 @@ * this GPIO block ORs all interrupts on CPU card (creg,..) * to uplink only 1 IRQ to ARC core intc */ - dw-apb-gpio@0x2000 { + dw-apb-gpio@2000 { compatible = "snps,dw-apb-gpio"; reg = < 0x2000 0x80 >; #address-cells = <1>; @@ -74,7 +74,7 @@ }; }; - debug_uart: dw-apb-uart@0x5000 { + debug_uart: dw-apb-uart@5000 { compatible = "snps,dw-apb-uart"; reg = <0x5000 0x100>; clock-frequency = <33333000>; @@ -102,19 +102,19 @@ * external DMA buffer located outside of IOC aperture. */ axs10x_mb { - ethernet@0x18000 { + ethernet@18000 { dma-coherent; }; - ehci@0x40000 { + ehci@40000 { dma-coherent; }; - ohci@0x60000 { + ohci@60000 { dma-coherent; }; - mmc@0x15000 { + mmc@15000 { dma-coherent; }; }; @@ -132,7 +132,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@0xe0012000 { + mb_intc: dw-apb-ictl@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; @@ -153,7 +153,7 @@ #size-cells = <2>; ranges; /* - * Move frame buffer out of IOC aperture (0x8z-0xAz). + * Move frame buffer out of IOC aperture (0x8z-0xaz). */ frame_buffer: frame_buffer@be000000 { compatible = "shared-dma-pool"; diff --git a/arch/arc/boot/dts/axc003_idu.dtsi b/arch/arc/boot/dts/axc003_idu.dtsi index a05bb737ea63..e401e59f6180 100644 --- a/arch/arc/boot/dts/axc003_idu.dtsi +++ b/arch/arc/boot/dts/axc003_idu.dtsi @@ -62,7 +62,7 @@ * this GPIO block ORs all interrupts on CPU card (creg,..) * to uplink only 1 IRQ to ARC core intc */ - dw-apb-gpio@0x2000 { + dw-apb-gpio@2000 { compatible = "snps,dw-apb-gpio"; reg = < 0x2000 0x80 >; #address-cells = <1>; @@ -81,7 +81,7 @@ }; }; - debug_uart: dw-apb-uart@0x5000 { + debug_uart: dw-apb-uart@5000 { compatible = "snps,dw-apb-uart"; reg = <0x5000 0x100>; clock-frequency = <33333000>; @@ -109,19 +109,19 @@ * external DMA buffer located outside of IOC aperture. */ axs10x_mb { - ethernet@0x18000 { + ethernet@18000 { dma-coherent; }; - ehci@0x40000 { + ehci@40000 { dma-coherent; }; - ohci@0x60000 { + ohci@60000 { dma-coherent; }; - mmc@0x15000 { + mmc@15000 { dma-coherent; }; }; @@ -138,7 +138,7 @@ * avoid duplicating the MB dtsi file given that IRQ from * this intc to cpu intc are different for axs101 and axs103 */ - mb_intc: dw-apb-ictl@0xe0012000 { + mb_intc: dw-apb-ictl@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0x0 0xe0012000 0x0 0x200 >; @@ -159,7 +159,7 @@ #size-cells = <2>; ranges; /* - * Move frame buffer out of IOC aperture (0x8z-0xAz). + * Move frame buffer out of IOC aperture (0x8z-0xaz). */ frame_buffer: frame_buffer@be000000 { compatible = "shared-dma-pool"; diff --git a/arch/arc/boot/dts/axs10x_mb.dtsi b/arch/arc/boot/dts/axs10x_mb.dtsi index 37bafd44e36d..4ead6dc9af2f 100644 --- a/arch/arc/boot/dts/axs10x_mb.dtsi +++ b/arch/arc/boot/dts/axs10x_mb.dtsi @@ -72,7 +72,7 @@ }; }; - gmac: ethernet@0x18000 { + gmac: ethernet@18000 { #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = < 0x18000 0x2000 >; @@ -88,13 +88,13 @@ mac-address = [00 00 00 00 00 00]; /* Filled in by U-Boot */ }; - ehci@0x40000 { + ehci@40000 { compatible = "generic-ehci"; reg = < 0x40000 0x100 >; interrupts = < 8 >; }; - ohci@0x60000 { + ohci@60000 { compatible = "generic-ohci"; reg = < 0x60000 0x100 >; interrupts = < 8 >; @@ -118,7 +118,7 @@ * dw_mci_pltfm_prepare_command() is used in generic platform * code. */ - mmc@0x15000 { + mmc@15000 { compatible = "altr,socfpga-dw-mshc"; reg = < 0x15000 0x400 >; fifo-depth = < 16 >; @@ -129,7 +129,7 @@ bus-width = < 4 >; }; - uart@0x20000 { + uart@20000 { compatible = "snps,dw-apb-uart"; reg = <0x20000 0x100>; clock-frequency = <33333333>; @@ -139,7 +139,7 @@ reg-io-width = <4>; }; - uart@0x21000 { + uart@21000 { compatible = "snps,dw-apb-uart"; reg = <0x21000 0x100>; clock-frequency = <33333333>; @@ -150,7 +150,7 @@ }; /* UART muxed with USB data port (ttyS3) */ - uart@0x22000 { + uart@22000 { compatible = "snps,dw-apb-uart"; reg = <0x22000 0x100>; clock-frequency = <33333333>; @@ -160,7 +160,7 @@ reg-io-width = <4>; }; - i2c@0x1d000 { + i2c@1d000 { compatible = "snps,designware-i2c"; reg = <0x1d000 0x100>; clock-frequency = <400000>; @@ -177,7 +177,7 @@ #sound-dai-cells = <0>; }; - i2c@0x1f000 { + i2c@1f000 { compatible = "snps,designware-i2c"; #address-cells = <1>; #size-cells = <0>; @@ -218,13 +218,13 @@ }; }; - eeprom@0x54{ + eeprom@54{ compatible = "atmel,24c01"; reg = <0x54>; pagesize = <0x8>; }; - eeprom@0x57{ + eeprom@57{ compatible = "atmel,24c04"; reg = <0x57>; pagesize = <0x8>; diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 43f17b51ee89..6c23fa063ced 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -110,12 +110,12 @@ cgu_rst: reset-controller@8a0 { compatible = "snps,hsdk-reset"; #reset-cells = <1>; - reg = <0x8A0 0x4>, <0xFF0 0x4>; + reg = <0x8a0 0x4>, <0xff0 0x4>; }; core_clk: core-clk@0 { compatible = "snps,hsdk-core-pll-clock"; - reg = <0x00 0x10>, <0x14B8 0x4>; + reg = <0x00 0x10>, <0x14b8 0x4>; #clock-cells = <0>; clocks = <&input_clk>; diff --git a/arch/arc/boot/dts/vdk_axc003.dtsi b/arch/arc/boot/dts/vdk_axc003.dtsi index 0fd6ba985b16..84e8766c8ca2 100644 --- a/arch/arc/boot/dts/vdk_axc003.dtsi +++ b/arch/arc/boot/dts/vdk_axc003.dtsi @@ -36,7 +36,7 @@ #interrupt-cells = <1>; }; - debug_uart: dw-apb-uart@0x5000 { + debug_uart: dw-apb-uart@5000 { compatible = "snps,dw-apb-uart"; reg = <0x5000 0x100>; clock-frequency = <2403200>; @@ -49,7 +49,7 @@ }; - mb_intc: dw-apb-ictl@0xe0012000 { + mb_intc: dw-apb-ictl@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0xe0012000 0x200 >; diff --git a/arch/arc/boot/dts/vdk_axc003_idu.dtsi b/arch/arc/boot/dts/vdk_axc003_idu.dtsi index 28956f9a9f3d..eb7e705e8a27 100644 --- a/arch/arc/boot/dts/vdk_axc003_idu.dtsi +++ b/arch/arc/boot/dts/vdk_axc003_idu.dtsi @@ -44,7 +44,7 @@ #interrupt-cells = <1>; }; - debug_uart: dw-apb-uart@0x5000 { + debug_uart: dw-apb-uart@5000 { compatible = "snps,dw-apb-uart"; reg = <0x5000 0x100>; clock-frequency = <2403200>; @@ -57,7 +57,7 @@ }; - mb_intc: dw-apb-ictl@0xe0012000 { + mb_intc: dw-apb-ictl@e0012000 { #interrupt-cells = <1>; compatible = "snps,dw-apb-ictl"; reg = < 0xe0012000 0x200 >; diff --git a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi index 48bb4b4cd234..925d5cc95dbb 100644 --- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi +++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi @@ -36,7 +36,7 @@ }; }; - ethernet@0x18000 { + ethernet@18000 { #interrupt-cells = <1>; compatible = "snps,dwmac"; reg = < 0x18000 0x2000 >; @@ -49,13 +49,13 @@ clock-names = "stmmaceth"; }; - ehci@0x40000 { + ehci@40000 { compatible = "generic-ehci"; reg = < 0x40000 0x100 >; interrupts = < 8 >; }; - uart@0x20000 { + uart@20000 { compatible = "snps,dw-apb-uart"; reg = <0x20000 0x100>; clock-frequency = <2403200>; @@ -65,7 +65,7 @@ reg-io-width = <4>; }; - uart@0x21000 { + uart@21000 { compatible = "snps,dw-apb-uart"; reg = <0x21000 0x100>; clock-frequency = <2403200>; @@ -75,7 +75,7 @@ reg-io-width = <4>; }; - uart@0x22000 { + uart@22000 { compatible = "snps,dw-apb-uart"; reg = <0x22000 0x100>; clock-frequency = <2403200>; @@ -101,7 +101,7 @@ interrupt-names = "arc_ps2_irq"; }; - mmc@0x15000 { + mmc@15000 { compatible = "snps,dw-mshc"; reg = <0x15000 0x400>; fifo-depth = <1024>; @@ -117,11 +117,11 @@ * Embedded Vision subsystem UIO mappings; only relevant for EV VDK * * This node is intentionally put outside of MB above becase - * it maps areas outside of MB's 0xEz-0xFz. + * it maps areas outside of MB's 0xez-0xfz. */ - uio_ev: uio@0xD0000000 { + uio_ev: uio@d0000000 { compatible = "generic-uio"; - reg = <0xD0000000 0x2000 0xD1000000 0x2000 0x90000000 0x10000000 0xC0000000 0x10000000>; + reg = <0xd0000000 0x2000 0xd1000000 0x2000 0x90000000 0x10000000 0xc0000000 0x10000000>; reg-names = "ev_gsa", "ev_ctrl", "ev_shared_mem", "ev_code_mem"; interrupt-parent = <&mb_intc>; interrupts = <23>; -- cgit From 66f7d3709c431d37a0b7f2b59ed00fbf93de1f0d Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Fri, 15 Feb 2019 19:17:33 +0300 Subject: ARC: [plat-hsdk]: Add reset controller handle to manage USB reset DW USB controller on HSDK hangs sometimes after SW reset, so add reset handle to make possible to reset DW USB controller HW. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index 6c23fa063ced..b62088bf03a8 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -200,6 +200,7 @@ compatible = "snps,hsdk-v1.0-ohci", "generic-ohci"; reg = <0x60000 0x100>; interrupts = <15>; + resets = <&cgu_rst HSDK_USB_RESET>; dma-coherent; }; @@ -207,6 +208,7 @@ compatible = "snps,hsdk-v1.0-ehci", "generic-ehci"; reg = <0x40000 0x100>; interrupts = <15>; + resets = <&cgu_rst HSDK_USB_RESET>; dma-coherent; }; -- cgit From 5d4ab8d0960e399e85a55e659b11cfc86a03a776 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Tue, 19 Feb 2019 13:52:26 +0300 Subject: ARC: [plat-hsdk]: Enable AXI DW DMAC support Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/boot/dts/hsdk.dts | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/arch/arc/boot/dts/hsdk.dts b/arch/arc/boot/dts/hsdk.dts index b62088bf03a8..69bc1c9e8e50 100644 --- a/arch/arc/boot/dts/hsdk.dts +++ b/arch/arc/boot/dts/hsdk.dts @@ -167,6 +167,18 @@ #clock-cells = <0>; }; + dmac_core_clk: dmac-core-clk { + compatible = "fixed-clock"; + clock-frequency = <400000000>; + #clock-cells = <0>; + }; + + dmac_cfg_clk: dmac-gpu-cfg-clk { + compatible = "fixed-clock"; + clock-frequency = <200000000>; + #clock-cells = <0>; + }; + gmac: ethernet@8000 { #interrupt-cells = <1>; compatible = "snps,dwmac"; @@ -239,6 +251,21 @@ reg = <0>; }; }; + + dmac: dmac@80000 { + compatible = "snps,axi-dma-1.01a"; + reg = <0x80000 0x400>; + interrupts = <27>; + clocks = <&dmac_core_clk>, <&dmac_cfg_clk>; + clock-names = "core-clk", "cfgr-clk"; + + dma-channels = <4>; + snps,dma-masters = <2>; + snps,data-width = <3>; + snps,block-size = <4096 4096 4096 4096>; + snps,priority = <0 1 2 3>; + snps,axi-max-burst-len = <16>; + }; }; memory@80000000 { -- cgit From 4d1e7918aae59ef504f5170a4f0c7ae82339fcb2 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 30 Jan 2019 19:32:43 +0300 Subject: ARCv2: lib: introduce memcpy optimized for unaligned access Optimise code to use efficient unaligned memory access which is available on ARCv2. This allows us to really simplify memcpy code and speed up the code one and a half times (in case of unaligned source or destination). Don't wire it up yet ! Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/lib/memcpy-archs-unaligned.S | 47 +++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 arch/arc/lib/memcpy-archs-unaligned.S diff --git a/arch/arc/lib/memcpy-archs-unaligned.S b/arch/arc/lib/memcpy-archs-unaligned.S new file mode 100644 index 000000000000..28993a73fdde --- /dev/null +++ b/arch/arc/lib/memcpy-archs-unaligned.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * ARCv2 memcpy implementation optimized for unaligned memory access using. + * + * Copyright (C) 2019 Synopsys + * Author: Eugeniy Paltsev + */ + +#include + +#ifdef CONFIG_ARC_HAS_LL64 +# define LOADX(DST,RX) ldd.ab DST, [RX, 8] +# define STOREX(SRC,RX) std.ab SRC, [RX, 8] +# define ZOLSHFT 5 +# define ZOLAND 0x1F +#else +# define LOADX(DST,RX) ld.ab DST, [RX, 4] +# define STOREX(SRC,RX) st.ab SRC, [RX, 4] +# define ZOLSHFT 4 +# define ZOLAND 0xF +#endif + +ENTRY_CFI(memcpy) + mov r3, r0 ; don;t clobber ret val + + lsr.f lp_count, r2, ZOLSHFT + lpnz @.Lcopy32_64bytes + ;; LOOP START + LOADX (r6, r1) + LOADX (r8, r1) + LOADX (r10, r1) + LOADX (r4, r1) + STOREX (r6, r3) + STOREX (r8, r3) + STOREX (r10, r3) + STOREX (r4, r3) +.Lcopy32_64bytes: + + and.f lp_count, r2, ZOLAND ;Last remaining 31 bytes + lpnz @.Lcopyremainingbytes + ;; LOOP START + ldb.ab r5, [r1, 1] + stb.ab r5, [r3, 1] +.Lcopyremainingbytes: + + j [blink] +END_CFI(memcpy) -- cgit From 76551468833cd5c356b1d9ff4bc9393fcf768a59 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Wed, 30 Jan 2019 19:32:41 +0300 Subject: ARCv2: Add explcit unaligned access support (and ability to disable too) As of today we enable unaligned access unconditionally on ARCv2. Do this under a Kconfig option to allow disable it for test, benchmarking etc. Also while at it - Select HAVE_EFFICIENT_UNALIGNED_ACCESS - Although gcc defaults to unaligned access (since GNU 2018.03), add the right toggles for enabling or disabling as appropriate - update bootlog to prints both HW feature status (exists, enabled/disabled) and SW status (used / not used). - wire up the relaxed memcpy for unaligned access Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta [vgupta: squashed patches, handle gcc -mno-unaligned-access quick] --- arch/arc/Kconfig | 9 +++++++++ arch/arc/Makefile | 6 ++++++ arch/arc/include/asm/arcregs.h | 1 + arch/arc/include/asm/irqflags-arcv2.h | 8 +++++++- arch/arc/kernel/head.S | 5 +++++ arch/arc/kernel/intc-arcv2.c | 2 +- arch/arc/kernel/setup.c | 25 ++++++++++++++++--------- arch/arc/kernel/troubleshoot.c | 5 ++++- arch/arc/lib/Makefile | 8 +++++++- 9 files changed, 56 insertions(+), 13 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d750b302d5ab..95fb11a85566 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -386,6 +386,15 @@ config ARC_HAS_SWAPE if ISA_ARCV2 +config ARC_USE_UNALIGNED_MEM_ACCESS + bool "Enable unaligned access in HW" + default y + select HAVE_EFFICIENT_UNALIGNED_ACCESS + help + The ARC HS architecture supports unaligned memory access + which is disabled by default. Enable unaligned access in + hardware and use software to use it + config ARC_HAS_LL64 bool "Insn: 64bit LDD/STD" help diff --git a/arch/arc/Makefile b/arch/arc/Makefile index df00578c279d..e2b991f75bc5 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -28,6 +28,12 @@ cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape ifdef CONFIG_ISA_ARCV2 +ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS +cflags-y += -munaligned-access +else +cflags-y += -mno-unaligned-access +endif + ifndef CONFIG_ARC_HAS_LL64 cflags-y += -mno-ll64 endif diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index a27eafdc8260..0c13317af1d6 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -82,6 +82,7 @@ #define ECR_V_DTLB_MISS 0x05 #define ECR_V_PROTV 0x06 #define ECR_V_TRAP 0x09 +#define ECR_V_MISALIGN 0x0d #endif /* DTLB Miss and Protection Violation Cause Codes */ diff --git a/arch/arc/include/asm/irqflags-arcv2.h b/arch/arc/include/asm/irqflags-arcv2.h index 8a4f77ea3238..e66d0339e1d8 100644 --- a/arch/arc/include/asm/irqflags-arcv2.h +++ b/arch/arc/include/asm/irqflags-arcv2.h @@ -44,7 +44,13 @@ #define ARCV2_IRQ_DEF_PRIO 1 /* seed value for status register */ -#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | STATUS_AD_MASK | \ +#ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS +#define __AD_ENB STATUS_AD_MASK +#else +#define __AD_ENB 0 +#endif + +#define ISA_INIT_STATUS_BITS (STATUS_IE_MASK | __AD_ENB | \ (ARCV2_IRQ_DEF_PRIO << 1)) #ifndef __ASSEMBLY__ diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 30e090625916..36774348d23b 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -54,7 +54,12 @@ ; gcc 7.3.1 (ARC GNU 2018.03) onwards generates unaligned access ; by default lr r5, [status32] +#ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS bset r5, r5, STATUS_AD_BIT +#else + ; Although disabled at reset, bootloader might have enabled it + bclr r5, r5, STATUS_AD_BIT +#endif kflag r5 #endif .endm diff --git a/arch/arc/kernel/intc-arcv2.c b/arch/arc/kernel/intc-arcv2.c index cf18b3e5a934..c0d0124de089 100644 --- a/arch/arc/kernel/intc-arcv2.c +++ b/arch/arc/kernel/intc-arcv2.c @@ -95,7 +95,7 @@ void arc_init_IRQ(void) /* setup status32, don't enable intr yet as kernel doesn't want */ tmp = read_aux_reg(ARC_REG_STATUS32); - tmp |= STATUS_AD_MASK | (ARCV2_IRQ_DEF_PRIO << 1); + tmp |= ARCV2_IRQ_DEF_PRIO << 1; tmp &= ~STATUS_IE_MASK; asm volatile("kflag %0 \n"::"r"(tmp)); } diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 7b2340996cf8..2266e4ee4142 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -263,7 +263,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; - int i, n = 0, ua = 0; + int n = 0; FIX_PTR(cpu); @@ -283,16 +283,23 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT), IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT)); -#ifdef __ARC_UNALIGNED__ - ua = 1; + n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s", + IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), + IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS)); + +#if defined(__ARC_UNALIGNED__) && !defined(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) + /* + * gcc 7.3.1 (GNU 2018.03) onwards generate unaligned access by default + * but -mno-unaligned-access to disable that didn't work until gcc 8.2.1 + * (GNU 2019.03). So landing here implies the interim period, when + * despite Kconfig being off, gcc is generating unaligned accesses which + * could bomb later on. So better to disallow such broken builds + */ + BUILD_BUG_ON_MSG(1, "gcc doesn't support -mno-unaligned-access"); #endif - n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s%s", - IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), - IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), - IS_AVAIL1(cpu->isa.unalign, "unalign "), IS_USED_RUN(ua)); - if (i) - n += scnprintf(buf + n, len - n, "\n\t\t: "); + n += scnprintf(buf + n, len - n, "\n\t\t: "); if (cpu->extn_mpy.ver) { if (cpu->extn_mpy.ver <= 0x2) { /* ARCompact */ diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 215f515442e0..b0aa8c028331 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -145,7 +145,8 @@ static void show_ecr_verbose(struct pt_regs *regs) } else if (vec == ECR_V_PROTV) { if (cause_code == ECR_C_PROTV_INST_FETCH) pr_cont("Execute from Non-exec Page\n"); - else if (cause_code == ECR_C_PROTV_MISALIG_DATA) + else if (cause_code == ECR_C_PROTV_MISALIG_DATA && + IS_ENABLED(CONFIG_ISA_ARCOMPACT)) pr_cont("Misaligned r/w from 0x%08lx\n", address); else pr_cont("%s access not allowed on page\n", @@ -161,6 +162,8 @@ static void show_ecr_verbose(struct pt_regs *regs) pr_cont("Bus Error from Data Mem\n"); else pr_cont("Bus Error, check PRM\n"); + } else if (vec == ECR_V_MISALIGN) { + pr_cont("Misaligned r/w from 0x%08lx\n", address); #endif } else if (vec == ECR_V_TRAP) { if (regs->ecr_param == 5) diff --git a/arch/arc/lib/Makefile b/arch/arc/lib/Makefile index b1656d156097..f7537b466b23 100644 --- a/arch/arc/lib/Makefile +++ b/arch/arc/lib/Makefile @@ -8,4 +8,10 @@ lib-y := strchr-700.o strcpy-700.o strlen.o memcmp.o lib-$(CONFIG_ISA_ARCOMPACT) += memcpy-700.o memset.o strcmp.o -lib-$(CONFIG_ISA_ARCV2) += memcpy-archs.o memset-archs.o strcmp-archs.o +lib-$(CONFIG_ISA_ARCV2) += memset-archs.o strcmp-archs.o + +ifdef CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS +lib-$(CONFIG_ISA_ARCV2) +=memcpy-archs-unaligned.o +else +lib-$(CONFIG_ISA_ARCV2) +=memcpy-archs.o +endif -- cgit From fbe025c3eaf5f52060c2820eddb7357363db0d27 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 22 Feb 2019 10:42:44 -0800 Subject: ARC: perf: bpok condition only exists for ARCompact Signed-off-by: Vineet Gupta --- arch/arc/include/asm/perf_event.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arc/include/asm/perf_event.h b/arch/arc/include/asm/perf_event.h index 6958545390f0..9cd7ee4fad39 100644 --- a/arch/arc/include/asm/perf_event.h +++ b/arch/arc/include/asm/perf_event.h @@ -105,10 +105,10 @@ static const char * const arc_pmu_ev_hw_map[] = { [PERF_COUNT_HW_INSTRUCTIONS] = "iall", /* All jump instructions that are taken */ [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = "ijmptak", - [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ #ifdef CONFIG_ISA_ARCV2 [PERF_COUNT_HW_BRANCH_MISSES] = "bpmp", #else + [PERF_COUNT_ARC_BPOK] = "bpok", /* NP-NT, PT-T, PNT-NT */ [PERF_COUNT_HW_BRANCH_MISSES] = "bpfail", /* NP-T, PT-NT, PNT-T */ #endif [PERF_COUNT_ARC_LDC] = "imemrdc", /* Instr: mem read cached */ -- cgit From edb64bca50cd736c6894cc6081d5263c007ce005 Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 25 Feb 2019 20:16:01 +0300 Subject: ARC: u-boot args: check that magic number is correct In case of devboards we really often disable bootloader and load Linux image in memory via JTAG. Even if kernel tries to verify uboot_tag and uboot_arg there is sill a chance that we treat some garbage in registers as valid u-boot arguments in JTAG case. E.g. it is enough to have '1' in r0 to treat any value in r2 as a boot command line. So check that magic number passed from u-boot is correct and drop u-boot arguments otherwise. That helps to reduce the possibility of using garbage as u-boot arguments in JTAG case. We can safely check U-boot magic value (0x0) in linux passed via r1 register as U-boot pass it from the beginning. So there is no backward-compatibility issues. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta --- arch/arc/kernel/head.S | 1 + arch/arc/kernel/setup.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 36774348d23b..8f6e0447dd17 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -111,6 +111,7 @@ ENTRY(stext) ; r2 = pointer to uboot provided cmdline or external DTB in mem ; These are handled later in handle_uboot_args() st r0, [@uboot_tag] + st r1, [@uboot_magic] st r2, [@uboot_arg] ; setup "current" tsk and optionally cache it in dedicated r25 diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 2266e4ee4142..ec2fd231ecd9 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -36,6 +36,7 @@ unsigned int intr_to_DE_cnt; /* Part of U-boot ABI: see head.S */ int __initdata uboot_tag; +int __initdata uboot_magic; char __initdata *uboot_arg; const struct machine_desc *machine_desc; @@ -504,6 +505,8 @@ static inline bool uboot_arg_invalid(unsigned long addr) #define UBOOT_TAG_NONE 0 #define UBOOT_TAG_CMDLINE 1 #define UBOOT_TAG_DTB 2 +/* We always pass 0 as magic from U-boot */ +#define UBOOT_MAGIC_VALUE 0 void __init handle_uboot_args(void) { @@ -518,6 +521,11 @@ void __init handle_uboot_args(void) goto ignore_uboot_args; } + if (uboot_magic != UBOOT_MAGIC_VALUE) { + pr_warn(IGNORE_ARGS "non zero uboot magic\n"); + goto ignore_uboot_args; + } + if (uboot_tag != UBOOT_TAG_NONE && uboot_arg_invalid((unsigned long)uboot_arg)) { pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg); -- cgit From 0728aeb7ead99a9b0dac2f3c92b3752b4e02ff97 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 25 Feb 2019 09:45:38 +0000 Subject: arc: hsdk_defconfig: Enable CONFIG_BLK_DEV_RAM We have now a HSDK device in our kernelci lab, but kernel builded via the hsdk_defconfig lacks ramfs supports, so it cannot boot kernelci jobs yet. So this patch enable CONFIG_BLK_DEV_RAM in hsdk_defconfig. Signed-off-by: Corentin Labbe Acked-by: Alexey Brodkin Signed-off-by: Vineet Gupta --- arch/arc/configs/hsdk_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 87b23b7fb781..aefcf7a4e17a 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -8,6 +8,7 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_RAM=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set -- cgit From 00a4ae65cc600b008c80429a4fa37ccee21f139e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 25 Feb 2019 11:24:05 -0800 Subject: ARCv2: boot log: refurbish HS core/release identification HS core names and releases have so far been identified based solely on IDENTIFY.ARCVER field. With the future HS releases this will not be sufficient as same ARCVER 0x54 could be an HS38 or HS48. So rewrite the code to use a new BCR to identify the cores properly. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/kernel/setup.c | 130 +++++++++++++++++++++++------------------ 2 files changed, 75 insertions(+), 57 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 0c13317af1d6..36e5d124f5ae 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -313,7 +313,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa_arcv2 isa; - const char *details, *name; + const char *release, *name; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index ec2fd231ecd9..6bd466892915 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -45,29 +45,24 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; -static const struct id_to_str arc_cpu_rel[] = { +static const struct id_to_str arc_legacy_rel[] = { + /* ID.ARCVER, Release */ #ifdef CONFIG_ISA_ARCOMPACT - { 0x34, "R4.10"}, - { 0x35, "R4.11"}, + { 0x34, "R4.10"}, + { 0x35, "R4.11"}, #else - { 0x51, "R2.0" }, - { 0x52, "R2.1" }, - { 0x53, "R3.0" }, - { 0x54, "R3.10a" }, + { 0x51, "R2.0" }, + { 0x52, "R2.1" }, + { 0x53, "R3.0" }, #endif - { 0x00, NULL } + { 0x00, NULL } }; -static const struct id_to_str arc_cpu_nm[] = { -#ifdef CONFIG_ISA_ARCOMPACT - { 0x20, "ARC 600" }, - { 0x30, "ARC 770" }, /* 750 identified seperately */ -#else - { 0x40, "ARC EM" }, - { 0x50, "ARC HS38" }, - { 0x54, "ARC HS48" }, -#endif - { 0x00, "Unknown" } +static const struct id_to_str arc_cpu_rel[] = { + /* UARCH.MAJOR, Release */ + { 0, "R3.10a"}, + { 1, "R3.50a"}, + { 0xFF, NULL } }; static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) @@ -117,31 +112,72 @@ static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) } } +static void decode_arc_core(struct cpuinfo_arc *cpu) +{ + struct bcr_uarch_build_arcv2 uarch; + const struct id_to_str *tbl; + + /* + * Up until (including) the first core4 release (0x54) things were + * simple: AUX IDENTITY.ARCVER was sufficient to identify arc family + * and release: 0x50 to 0x53 was HS38, 0x54 was HS48 (dual issue) + */ + + if (cpu->core.family < 0x54) { /* includes arc700 */ + + for (tbl = &arc_legacy_rel[0]; tbl->id != 0; tbl++) { + if (cpu->core.family == tbl->id) { + cpu->release = tbl->str; + break; + } + } + + if (is_isa_arcompact()) + cpu->name = "ARC700"; + else if (tbl->str) + cpu->name = "HS38"; + else + cpu->name = cpu->release = "Unknown"; + + return; + } + + /* + * However the subsequent HS release (same 0x54) allow HS38 or HS48 + * configurations and encode this info in a different BCR. + * The BCR was introduced in 0x54 so can't be read unconditionally. + */ + + READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch); + + if (uarch.prod == 4) { + cpu->name = "HS48"; + cpu->extn.dual = 1; + + } else { + cpu->name = "HS38"; + } + + for (tbl = &arc_cpu_rel[0]; tbl->id != 0xFF; tbl++) { + if (uarch.maj == tbl->id) { + cpu->release = tbl->str; + break; + } + } +} + static void read_arc_build_cfg_regs(void) { struct bcr_timer timer; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; - const struct id_to_str *tbl; struct bcr_isa_arcv2 isa; struct bcr_actionpoint ap; FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); - - for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) { - if (cpu->core.family == tbl->id) { - cpu->details = tbl->str; - break; - } - } - - for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) { - if ((cpu->core.family & 0xF4) == tbl->id) - break; - } - cpu->name = tbl->str; + decode_arc_core(cpu); READ_BCR(ARC_REG_TIMERS_BCR, timer); cpu->extn.timer0 = timer.t0; @@ -199,30 +235,12 @@ static void read_arc_build_cfg_regs(void) cpu->bpu.num_pred = 2048 << bpu.pte; cpu->bpu.ret_stk = 4 << bpu.rse; - if (cpu->core.family >= 0x54) { - - struct bcr_uarch_build_arcv2 uarch; - - /* - * The first 0x54 core (uarch maj:min 0:1 or 0:2) was - * dual issue only (HS4x). But next uarch rev (1:0) - * allows it be configured for single issue (HS3x) - * Ensure we fiddle with dual issue only on HS4x - */ - READ_BCR(ARC_REG_MICRO_ARCH_BCR, uarch); + /* if dual issue hardware, is it enabled ? */ + if (cpu->extn.dual) { + unsigned int exec_ctrl; - if (uarch.prod == 4) { - unsigned int exec_ctrl; - - /* dual issue hardware always present */ - cpu->extn.dual = 1; - - READ_BCR(AUX_EXEC_CTRL, exec_ctrl); - - /* dual issue hardware enabled ? */ - cpu->extn.dual_enb = !(exec_ctrl & 1); - - } + READ_BCR(AUX_EXEC_CTRL, exec_ctrl); + cpu->extn.dual_enb = !(exec_ctrl & 1); } } @@ -273,7 +291,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) core->family, core->cpu_id, core->chip_id); n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s%s%s\n", - cpu_id, cpu->name, cpu->details, + cpu_id, cpu->name, cpu->release, is_isa_arcompact() ? "ARCompact" : "ARCv2", IS_AVAIL1(cpu->isa.be, "[Big-Endian]"), IS_AVAIL3(cpu->extn.dual, cpu->extn.dual_enb, " Dual-Issue ")); -- cgit From 85d6adcbbe6dfc557755543c6c39b497d3032cdc Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 25 Feb 2019 11:56:28 -0800 Subject: ARC: boot log: cut down on verbosity The syscall ABI has long been fixed, so no need to call that out now. Also, there's no need to print really fine details such as norm, barrel-shifter etc. Those are given in a Linux enabled hardware config. So now we print just 1 line for all optional "instruction" related hardware features | | ISA Extn : atomic ll64 unalign mpy[opt 9] div_rem vs. 2 before | |ISA Extn : atomic ll64 unalign | : mpy[opt 9] div_rem norm barrel-shift swap minmax swape Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 9 ----- arch/arc/kernel/setup.c | 84 ++++++++++++++++-------------------------- 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 36e5d124f5ae..a7d4be87b2f0 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -168,14 +168,6 @@ struct bcr_mpy { #endif }; -struct bcr_extn_xymem { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int ram_org:2, num_banks:4, bank_sz:4, ver:8; -#else - unsigned int ver:8, bank_sz:4, num_banks:4, ram_org:2; -#endif -}; - struct bcr_iccm_arcompact { #ifdef CONFIG_CPU_BIG_ENDIAN unsigned int base:16, pad:5, sz:3, ver:8; @@ -323,7 +315,6 @@ struct cpuinfo_arc { timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; } extn; struct bcr_mpy extn_mpy; - struct bcr_extn_xymem extn_xymem; }; extern struct cpuinfo_arc cpuinfo_arc700[]; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 6bd466892915..15da43d2e416 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -188,16 +188,6 @@ static void read_arc_build_cfg_regs(void) READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); - cpu->extn.norm = read_aux_reg(ARC_REG_NORM_BCR) > 1 ? 1 : 0; /* 2,3 */ - cpu->extn.barrel = read_aux_reg(ARC_REG_BARREL_BCR) > 1 ? 1 : 0; /* 2,3 */ - cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */ - cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0; - cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */ - cpu->extn.swape = (cpu->core.family >= 0x34) ? 1 : - IS_ENABLED(CONFIG_ARC_HAS_SWAPE); - - READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem); - /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */ read_decode_ccm_bcr(cpu); @@ -282,6 +272,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; + char mpy_opt[16]; int n = 0; FIX_PTR(cpu); @@ -302,10 +293,27 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL2(cpu->extn.rtc, "RTC [UP 64-bit] ", CONFIG_ARC_TIMERS_64BIT), IS_AVAIL2(cpu->extn.gfrc, "GFRC [SMP 64-bit] ", CONFIG_ARC_TIMERS_64BIT)); - n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s", + if (cpu->extn_mpy.ver) { + if (is_isa_arcompact()) { + scnprintf(mpy_opt, 16, "mpy"); + } else { + + int opt = 2; /* stock MPY/MPYH */ + + if (cpu->extn_mpy.dsp) /* OPT 7-9 */ + opt = cpu->extn_mpy.dsp + 6; + + scnprintf(mpy_opt, 16, "mpy[opt %d] ", opt); + } + } + + n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), - IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS)); + IS_AVAIL2(cpu->isa.unalign, "unalign ", CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS), + IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt), + IS_AVAIL1(cpu->isa.div_rem, "div_rem ")); + #if defined(__ARC_UNALIGNED__) && !defined(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) /* @@ -318,52 +326,29 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) BUILD_BUG_ON_MSG(1, "gcc doesn't support -mno-unaligned-access"); #endif - n += scnprintf(buf + n, len - n, "\n\t\t: "); - - if (cpu->extn_mpy.ver) { - if (cpu->extn_mpy.ver <= 0x2) { /* ARCompact */ - n += scnprintf(buf + n, len - n, "mpy "); - } else { - int opt = 2; /* stock MPY/MPYH */ - - if (cpu->extn_mpy.dsp) /* OPT 7-9 */ - opt = cpu->extn_mpy.dsp + 6; - - n += scnprintf(buf + n, len - n, "mpy[opt %d] ", opt); - } - } - - n += scnprintf(buf + n, len - n, "%s%s%s%s%s%s%s%s\n", - IS_AVAIL1(cpu->isa.div_rem, "div_rem "), - IS_AVAIL1(cpu->extn.norm, "norm "), - IS_AVAIL1(cpu->extn.barrel, "barrel-shift "), - IS_AVAIL1(cpu->extn.swap, "swap "), - IS_AVAIL1(cpu->extn.minmax, "minmax "), - IS_AVAIL1(cpu->extn.crc, "crc "), - IS_AVAIL2(cpu->extn.swape, "swape", CONFIG_ARC_HAS_SWAPE)); - - if (cpu->bpu.ver) + if (cpu->bpu.ver) { n += scnprintf(buf + n, len - n, "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d", IS_AVAIL1(cpu->bpu.full, "full"), IS_AVAIL1(!cpu->bpu.full, "partial"), cpu->bpu.num_cache, cpu->bpu.num_pred, cpu->bpu.ret_stk); - if (is_isa_arcv2()) { - struct bcr_lpb lpb; + if (is_isa_arcv2()) { + struct bcr_lpb lpb; - READ_BCR(ARC_REG_LPB_BUILD, lpb); - if (lpb.ver) { - unsigned int ctl; - ctl = read_aux_reg(ARC_REG_LPB_CTRL); + READ_BCR(ARC_REG_LPB_BUILD, lpb); + if (lpb.ver) { + unsigned int ctl; + ctl = read_aux_reg(ARC_REG_LPB_CTRL); - n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s", - lpb.entries, - IS_DISABLED_RUN(!ctl)); + n += scnprintf(buf + n, len - n, " Loop Buffer:%d %s", + lpb.entries, + IS_DISABLED_RUN(!ctl)); + } } + n += scnprintf(buf + n, len - n, "\n"); } - n += scnprintf(buf + n, len - n, "\n"); return buf; } @@ -416,11 +401,6 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) } } - n += scnprintf(buf + n, len - n, "OS ABI [v%d]\t: %s\n", - EF_ARC_OSABI_CURRENT >> 8, - EF_ARC_OSABI_CURRENT == EF_ARC_OSABI_V3 ? - "no-legacy-syscalls" : "64-bit data any register aligned"); - return buf; } -- cgit From 9f4984773240cf004cbffb99316af3269bbb5e26 Mon Sep 17 00:00:00 2001 From: Weinan Li Date: Wed, 27 Feb 2019 15:36:58 +0800 Subject: drm/i915/gvt: stop scheduling workload when vgpu is inactive There is one corner case that workload_thread may pick and dispatch one workload of vgpu after it's already deactivated. Below is the scenario: 1. deactive_vgpu got the vgpu_lock, it found pending workload was submitted, then it released the vgpu_lock and wait for vgpu idle. 2. before deactive_vgpu got the vgpu_lock back, workload_thread might pick one new valid workload, then it was blocked by the vgpu_lock. 3. deactive_vgpu got the vgpu_lock again, finished the last processes of deactivating, then release the vgpu_lock. 4. workload_thread got the vgpu_lock, then it will try to dispatch the fetched workload. It's not expected one workload of deactivated vgpu is dispatched. The solution is to add condition check of the vgpu's active flag and stop to schedule when it's inactive. Reviewed-by: Zhenyu Wang Signed-off-by: Weinan Li Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 55bb7885e228..af97df10495e 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -738,7 +738,8 @@ static struct intel_vgpu_workload *pick_next_workload( goto out; } - if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id))) + if (!scheduler->current_vgpu->active || + list_empty(workload_q_head(scheduler->current_vgpu, ring_id))) goto out; /* -- cgit From f552e7bd028f93157b206bd1c7e3e195e919e8c2 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 1 Mar 2019 15:55:04 +0800 Subject: drm/i915/gvt: Don't submit request for error workload dispatch As vGPU shadow ctx is loaded with guest context state, arbitrarily submitting request in error workload dispatch path would cause trouble. So don't try to submit in error path now like in previous code. This is to fix VM failure when GPU hang happens. Fixes: f0e994372518 ("drm/i915/gvt: Fix workload request allocation before request add") Reviewed-by: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index af97df10495e..817d46f25f09 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -677,6 +677,7 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -702,6 +703,14 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) ret = prepare_workload(workload); out: + if (ret) { + /* We might still need to add request with + * clean ctx to retire it properly.. + */ + rq = fetch_and_zero(&workload->req); + i915_request_put(rq); + } + if (!IS_ERR_OR_NULL(workload->req)) { gvt_dbg_sched("ring id %d submit workload to i915 %p\n", ring_id, workload->req); -- cgit From bfb1ce1259ca201b50aa4ab5ec7e19266ef46896 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Tue, 5 Mar 2019 10:59:16 +0200 Subject: habanalabs: fix MMU number of pages calculation The requested allocation size is 64bit, hence the number of requested pages and the total requested size should 64bit as well. This patch fixes all places where these are treated as 32bit. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/debugfs.c | 7 ++++--- drivers/misc/habanalabs/habanalabs.h | 8 ++++---- drivers/misc/habanalabs/memory.c | 29 +++++++++++++++-------------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/drivers/misc/habanalabs/debugfs.c b/drivers/misc/habanalabs/debugfs.c index a53c12aff6ad..974a87789bd8 100644 --- a/drivers/misc/habanalabs/debugfs.c +++ b/drivers/misc/habanalabs/debugfs.c @@ -232,6 +232,7 @@ static int vm_show(struct seq_file *s, void *data) struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; enum vm_type_t *vm_type; bool once = true; + u64 j; int i; if (!dev_entry->hdev->mmu_enable) @@ -260,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data) } else { phys_pg_pack = hnode->ptr; seq_printf(s, - " 0x%-14llx %-10u %-4u\n", + " 0x%-14llx %-10llu %-4u\n", hnode->vaddr, phys_pg_pack->total_size, phys_pg_pack->handle); } @@ -282,9 +283,9 @@ static int vm_show(struct seq_file *s, void *data) phys_pg_pack->page_size); seq_puts(s, " physical address\n"); seq_puts(s, "---------------------\n"); - for (i = 0 ; i < phys_pg_pack->npages ; i++) { + for (j = 0 ; j < phys_pg_pack->npages ; j++) { seq_printf(s, " 0x%-14llx\n", - phys_pg_pack->pages[i]); + phys_pg_pack->pages[j]); } } spin_unlock(&vm->idr_lock); diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index a7c95e9f9b9a..806f0a5ee4d8 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -793,11 +793,11 @@ struct hl_vm_hash_node { * struct hl_vm_phys_pg_pack - physical page pack. * @vm_type: describes the type of the virtual area descriptor. * @pages: the physical page array. + * @npages: num physical pages in the pack. + * @total_size: total size of all the pages in this list. * @mapping_cnt: number of shared mappings. * @asid: the context related to this list. - * @npages: num physical pages in the pack. * @page_size: size of each page in the pack. - * @total_size: total size of all the pages in this list. * @flags: HL_MEM_* flags related to this list. * @handle: the provided handle related to this list. * @offset: offset from the first page. @@ -807,11 +807,11 @@ struct hl_vm_hash_node { struct hl_vm_phys_pg_pack { enum vm_type_t vm_type; /* must be first */ u64 *pages; + u64 npages; + u64 total_size; atomic_t mapping_cnt; u32 asid; - u32 npages; u32 page_size; - u32 total_size; u32 flags; u32 handle; u32 offset; diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 3a12fd1a5274..4f8c968e441a 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -56,9 +56,9 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, struct hl_device *hdev = ctx->hdev; struct hl_vm *vm = &hdev->vm; struct hl_vm_phys_pg_pack *phys_pg_pack; - u64 paddr = 0; - u32 total_size, num_pgs, num_curr_pgs, page_size, page_shift; - int handle, rc, i; + u64 paddr = 0, total_size, num_pgs, i; + u32 num_curr_pgs, page_size, page_shift; + int handle, rc; bool contiguous; num_curr_pgs = 0; @@ -73,7 +73,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size); if (!paddr) { dev_err(hdev->dev, - "failed to allocate %u huge contiguous pages\n", + "failed to allocate %llu huge contiguous pages\n", num_pgs); return -ENOMEM; } @@ -267,7 +267,7 @@ static void free_phys_pg_pack(struct hl_device *hdev, struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_vm *vm = &hdev->vm; - int i; + u64 i; if (!phys_pg_pack->created_from_userptr) { if (phys_pg_pack->contiguous) { @@ -519,7 +519,7 @@ static inline int add_va_block(struct hl_device *hdev, * - Return the start address of the virtual block */ static u64 get_va_block(struct hl_device *hdev, - struct hl_va_range *va_range, u32 size, u64 hint_addr, + struct hl_va_range *va_range, u64 size, u64 hint_addr, bool is_userptr) { struct hl_vm_va_block *va_block, *new_va_block = NULL; @@ -577,7 +577,8 @@ static u64 get_va_block(struct hl_device *hdev, } if (!new_va_block) { - dev_err(hdev->dev, "no available va block for size %u\n", size); + dev_err(hdev->dev, "no available va block for size %llu\n", + size); goto out; } @@ -648,8 +649,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, struct hl_vm_phys_pg_pack *phys_pg_pack; struct scatterlist *sg; dma_addr_t dma_addr; - u64 page_mask; - u32 npages, total_npages, page_size = PAGE_SIZE; + u64 page_mask, total_npages; + u32 npages, page_size = PAGE_SIZE; bool first = true, is_huge_page_opt = true; int rc, i, j; @@ -750,9 +751,9 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr, struct hl_vm_phys_pg_pack *phys_pg_pack) { struct hl_device *hdev = ctx->hdev; - u64 next_vaddr = vaddr, paddr; + u64 next_vaddr = vaddr, paddr, mapped_pg_cnt = 0, i; u32 page_size = phys_pg_pack->page_size; - int i, rc = 0, mapped_pg_cnt = 0; + int rc = 0; for (i = 0 ; i < phys_pg_pack->npages ; i++) { paddr = phys_pg_pack->pages[i]; @@ -764,7 +765,7 @@ static int map_phys_page_pack(struct hl_ctx *ctx, u64 vaddr, rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size); if (rc) { dev_err(hdev->dev, - "map failed for handle %u, npages: %d, mapped: %d", + "map failed for handle %u, npages: %llu, mapped: %llu", phys_pg_pack->handle, phys_pg_pack->npages, mapped_pg_cnt); goto err; @@ -985,10 +986,10 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr) struct hl_vm_hash_node *hnode = NULL; struct hl_userptr *userptr = NULL; enum vm_type_t *vm_type; - u64 next_vaddr; + u64 next_vaddr, i; u32 page_size; bool is_userptr; - int i, rc; + int rc; /* protect from double entrance */ mutex_lock(&ctx->mem_hash_lock); -- cgit From 4eb1d1253ddd95e985c57fc99e9de6802dd2d867 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 7 Mar 2019 15:47:19 +0200 Subject: habanalabs: fix bug when mapping very large memory area This patch fixes a bug of allocating a too big memory size with kmalloc, which causes a failure. In case of mapping a large memory block, an array of the relevant physical page addresses is allocated. If there are many pages the array might be too big to allocate with kmalloc, hence changing to kvmalloc. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/memory.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/misc/habanalabs/memory.c b/drivers/misc/habanalabs/memory.c index 4f8c968e441a..ce1fda40a8b8 100644 --- a/drivers/misc/habanalabs/memory.c +++ b/drivers/misc/habanalabs/memory.c @@ -93,7 +93,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args, phys_pg_pack->flags = args->flags; phys_pg_pack->contiguous = contiguous; - phys_pg_pack->pages = kcalloc(num_pgs, sizeof(u64), GFP_KERNEL); + phys_pg_pack->pages = kvmalloc_array(num_pgs, sizeof(u64), GFP_KERNEL); if (!phys_pg_pack->pages) { rc = -ENOMEM; goto pages_arr_err; @@ -148,7 +148,7 @@ page_err: gen_pool_free(vm->dram_pg_pool, phys_pg_pack->pages[i], page_size); - kfree(phys_pg_pack->pages); + kvfree(phys_pg_pack->pages); pages_arr_err: kfree(phys_pg_pack); pages_pack_err: @@ -288,7 +288,7 @@ static void free_phys_pg_pack(struct hl_device *hdev, } } - kfree(phys_pg_pack->pages); + kvfree(phys_pg_pack->pages); kfree(phys_pg_pack); } @@ -692,7 +692,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx, page_mask = ~(((u64) page_size) - 1); - phys_pg_pack->pages = kcalloc(total_npages, sizeof(u64), GFP_KERNEL); + phys_pg_pack->pages = kvmalloc_array(total_npages, sizeof(u64), + GFP_KERNEL); if (!phys_pg_pack->pages) { rc = -ENOMEM; goto page_pack_arr_mem_err; -- cgit From f650a95b71026f5940804f273f9c36b60634131f Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Wed, 13 Mar 2019 13:36:28 +0200 Subject: habanalabs: complete user context cleanup before hard reset This patch fixes a bug which led to a crash during hard reset flow. Before a hard reset is executed, we wait a few seconds for the user context cleanup to complete. If it wasn't completed, we kill the user process and move on to the reset flow. Upon killing the user process, the context cleanup flow begins and may take a while due to MMU unmaps. Meanwhile, in the driver reset flow, we change the PCI DRAM bar location which can interfere with the MMU that uses the bar. If the context cleanup flow didn't finish quickly, a crash may occur due to PCI DRAM bar mislocation during the MMU unmap. Hence adding a wait between killing the user process and the start of the reset flow. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index de46aa6ed154..93d67983ddba 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -11,6 +11,8 @@ #include #include +#define HL_PLDM_PENDING_RESET_PER_SEC (HL_PENDING_RESET_PER_SEC * 10) + bool hl_device_disabled_or_in_reset(struct hl_device *hdev) { if ((hdev->disabled) || (atomic_read(&hdev->in_reset))) @@ -462,9 +464,16 @@ static void hl_device_hard_reset_pending(struct work_struct *work) struct hl_device_reset_work *device_reset_work = container_of(work, struct hl_device_reset_work, reset_work); struct hl_device *hdev = device_reset_work->hdev; - u16 pending_cnt = HL_PENDING_RESET_PER_SEC; + u16 pending_total, pending_cnt; struct task_struct *task = NULL; + if (hdev->pldm) + pending_total = HL_PLDM_PENDING_RESET_PER_SEC; + else + pending_total = HL_PENDING_RESET_PER_SEC; + + pending_cnt = pending_total; + /* Flush all processes that are inside hl_open */ mutex_lock(&hdev->fd_open_cnt_lock); @@ -489,6 +498,19 @@ static void hl_device_hard_reset_pending(struct work_struct *work) } } + pending_cnt = pending_total; + + while ((atomic_read(&hdev->fd_open_cnt)) && (pending_cnt)) { + + pending_cnt--; + + ssleep(1); + } + + if (atomic_read(&hdev->fd_open_cnt)) + dev_crit(hdev->dev, + "Going to hard reset with open user contexts\n"); + mutex_unlock(&hdev->fd_open_cnt_lock); hl_device_reset(hdev, true, true); -- cgit From d12a5e2458d49aad2b7d25766794eec95ae8f6f1 Mon Sep 17 00:00:00 2001 From: Omer Shpigelman Date: Thu, 14 Mar 2019 16:54:45 +0200 Subject: habanalabs: fix mapping with page size bigger than 4KB This patch fixes the mapping of virtual address to physical addresses on architectures where PAGE_SIZE is bigger than 4KB. The break down to the device page size was done only for the virtual address while it should have been done for the physical address as well. As a result virtual addresses were mapped to wrong physical address. The fix is to apply the break down for the physical addresses as well in order to get correct mappings. Signed-off-by: Omer Shpigelman Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/mmu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/misc/habanalabs/mmu.c b/drivers/misc/habanalabs/mmu.c index 2f2e99cb2743..3a5a2cec8305 100644 --- a/drivers/misc/habanalabs/mmu.c +++ b/drivers/misc/habanalabs/mmu.c @@ -832,7 +832,7 @@ err: int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) { struct hl_device *hdev = ctx->hdev; - u64 real_virt_addr; + u64 real_virt_addr, real_phys_addr; u32 real_page_size, npages; int i, rc, mapped_cnt = 0; @@ -857,14 +857,16 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size) npages = page_size / real_page_size; real_virt_addr = virt_addr; + real_phys_addr = phys_addr; for (i = 0 ; i < npages ; i++) { - rc = _hl_mmu_map(ctx, real_virt_addr, phys_addr, + rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr, real_page_size); if (rc) goto err; real_virt_addr += real_page_size; + real_phys_addr += real_page_size; mapped_cnt++; } -- cgit From cbaa99ed1b697072f089693a7fe2d649d08bf317 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 3 Mar 2019 15:13:15 +0200 Subject: habanalabs: perform accounting for active CS This patch adds accounting for active CS. Active means that the CS was submitted to the H/W queues and was not completed yet. This is necessary to support suspend operation. Because the device will be reset upon suspend, we can only suspend after all active CS have been completed. Hence, we need to perform accounting on their number. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/command_submission.c | 6 ++++++ drivers/misc/habanalabs/device.c | 1 + drivers/misc/habanalabs/habanalabs.h | 13 ++++++++----- drivers/misc/habanalabs/hw_queue.c | 5 +++-- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/drivers/misc/habanalabs/command_submission.c b/drivers/misc/habanalabs/command_submission.c index 3525236ed8d9..19c84214a7ea 100644 --- a/drivers/misc/habanalabs/command_submission.c +++ b/drivers/misc/habanalabs/command_submission.c @@ -179,6 +179,12 @@ static void cs_do_release(struct kref *ref) /* We also need to update CI for internal queues */ if (cs->submitted) { + int cs_cnt = atomic_dec_return(&hdev->cs_active_cnt); + + WARN_ONCE((cs_cnt < 0), + "hl%d: error in CS active cnt %d\n", + hdev->id, cs_cnt); + hl_int_hw_queue_update_ci(cs); spin_lock(&hdev->hw_queues_mirror_lock); diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 93d67983ddba..470d8005b50e 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -218,6 +218,7 @@ static int device_early_init(struct hl_device *hdev) spin_lock_init(&hdev->hw_queues_mirror_lock); atomic_set(&hdev->in_reset, 0); atomic_set(&hdev->fd_open_cnt, 0); + atomic_set(&hdev->cs_active_cnt, 0); return 0; diff --git a/drivers/misc/habanalabs/habanalabs.h b/drivers/misc/habanalabs/habanalabs.h index 806f0a5ee4d8..a8ee52c880cd 100644 --- a/drivers/misc/habanalabs/habanalabs.h +++ b/drivers/misc/habanalabs/habanalabs.h @@ -1056,13 +1056,15 @@ struct hl_device_reset_work { * @cb_pool_lock: protects the CB pool. * @user_ctx: current user context executing. * @dram_used_mem: current DRAM memory consumption. - * @in_reset: is device in reset flow. - * @curr_pll_profile: current PLL profile. - * @fd_open_cnt: number of open user processes. * @timeout_jiffies: device CS timeout value. * @max_power: the max power of the device, as configured by the sysadmin. This * value is saved so in case of hard-reset, KMD will restore this * value and update the F/W after the re-initialization + * @in_reset: is device in reset flow. + * @curr_pll_profile: current PLL profile. + * @fd_open_cnt: number of open user processes. + * @cs_active_cnt: number of active command submissions on this device (active + * means already in H/W queues) * @major: habanalabs KMD major. * @high_pll: high PLL profile frequency. * @soft_reset_cnt: number of soft reset since KMD loading. @@ -1128,11 +1130,12 @@ struct hl_device { struct hl_ctx *user_ctx; atomic64_t dram_used_mem; + u64 timeout_jiffies; + u64 max_power; atomic_t in_reset; atomic_t curr_pll_profile; atomic_t fd_open_cnt; - u64 timeout_jiffies; - u64 max_power; + atomic_t cs_active_cnt; u32 major; u32 high_pll; u32 soft_reset_cnt; diff --git a/drivers/misc/habanalabs/hw_queue.c b/drivers/misc/habanalabs/hw_queue.c index 67bece26417c..ef3bb6951360 100644 --- a/drivers/misc/habanalabs/hw_queue.c +++ b/drivers/misc/habanalabs/hw_queue.c @@ -370,12 +370,13 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs) spin_unlock(&hdev->hw_queues_mirror_lock); } - list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) { + atomic_inc(&hdev->cs_active_cnt); + + list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) if (job->ext_queue) ext_hw_queue_schedule_job(job); else int_hw_queue_schedule_job(job); - } cs->submitted = true; -- cgit From 7cb5101ee0107376f8eace195a138f99174e80ff Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 3 Mar 2019 22:29:20 +0200 Subject: habanalabs: prevent host crash during suspend/resume This patch fixes the implementation of suspend/resume of the device so that upon resume of the device, the host won't crash due to PCI completion timeout. Upon suspend, the device is being reset due to PERST. Therefore, upon resume, the driver must initialize the PCI controller as if the driver was loaded. If the controller is not initialized and the device tries to access the device through the PCI bars, the host will crash with PCI completion timeout error. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/device.c | 46 ++++++++++++++++++++++++--- drivers/misc/habanalabs/goya/goya.c | 63 +------------------------------------ 2 files changed, 43 insertions(+), 66 deletions(-) diff --git a/drivers/misc/habanalabs/device.c b/drivers/misc/habanalabs/device.c index 470d8005b50e..77d51be66c7e 100644 --- a/drivers/misc/habanalabs/device.c +++ b/drivers/misc/habanalabs/device.c @@ -416,6 +416,27 @@ int hl_device_suspend(struct hl_device *hdev) pci_save_state(hdev->pdev); + /* Block future CS/VM/JOB completion operations */ + rc = atomic_cmpxchg(&hdev->in_reset, 0, 1); + if (rc) { + dev_err(hdev->dev, "Can't suspend while in reset\n"); + return -EIO; + } + + /* This blocks all other stuff that is not blocked by in_reset */ + hdev->disabled = true; + + /* + * Flush anyone that is inside the critical section of enqueue + * jobs to the H/W + */ + hdev->asic_funcs->hw_queues_lock(hdev); + hdev->asic_funcs->hw_queues_unlock(hdev); + + /* Flush processes that are sending message to CPU */ + mutex_lock(&hdev->send_cpu_message_lock); + mutex_unlock(&hdev->send_cpu_message_lock); + rc = hdev->asic_funcs->suspend(hdev); if (rc) dev_err(hdev->dev, @@ -443,21 +464,38 @@ int hl_device_resume(struct hl_device *hdev) pci_set_power_state(hdev->pdev, PCI_D0); pci_restore_state(hdev->pdev); - rc = pci_enable_device(hdev->pdev); + rc = pci_enable_device_mem(hdev->pdev); if (rc) { dev_err(hdev->dev, "Failed to enable PCI device in resume\n"); return rc; } + pci_set_master(hdev->pdev); + rc = hdev->asic_funcs->resume(hdev); if (rc) { - dev_err(hdev->dev, - "Failed to enable PCI access from device CPU\n"); - return rc; + dev_err(hdev->dev, "Failed to resume device after suspend\n"); + goto disable_device; + } + + + hdev->disabled = false; + atomic_set(&hdev->in_reset, 0); + + rc = hl_device_reset(hdev, true, false); + if (rc) { + dev_err(hdev->dev, "Failed to reset device during resume\n"); + goto disable_device; } return 0; + +disable_device: + pci_clear_master(hdev->pdev); + pci_disable_device(hdev->pdev); + + return rc; } static void hl_device_hard_reset_pending(struct work_struct *work) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 238dd57c541b..538d8d59d9dc 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -1201,15 +1201,6 @@ static int goya_stop_external_queues(struct hl_device *hdev) return retval; } -static void goya_resume_external_queues(struct hl_device *hdev) -{ - WREG32(mmDMA_QM_0_GLBL_CFG1, 0); - WREG32(mmDMA_QM_1_GLBL_CFG1, 0); - WREG32(mmDMA_QM_2_GLBL_CFG1, 0); - WREG32(mmDMA_QM_3_GLBL_CFG1, 0); - WREG32(mmDMA_QM_4_GLBL_CFG1, 0); -} - /* * goya_init_cpu_queues - Initialize PQ/CQ/EQ of CPU * @@ -2178,36 +2169,6 @@ static int goya_stop_internal_queues(struct hl_device *hdev) return retval; } -static void goya_resume_internal_queues(struct hl_device *hdev) -{ - WREG32(mmMME_QM_GLBL_CFG1, 0); - WREG32(mmMME_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC0_QM_GLBL_CFG1, 0); - WREG32(mmTPC0_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC1_QM_GLBL_CFG1, 0); - WREG32(mmTPC1_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC2_QM_GLBL_CFG1, 0); - WREG32(mmTPC2_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC3_QM_GLBL_CFG1, 0); - WREG32(mmTPC3_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC4_QM_GLBL_CFG1, 0); - WREG32(mmTPC4_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC5_QM_GLBL_CFG1, 0); - WREG32(mmTPC5_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC6_QM_GLBL_CFG1, 0); - WREG32(mmTPC6_CMDQ_GLBL_CFG1, 0); - - WREG32(mmTPC7_QM_GLBL_CFG1, 0); - WREG32(mmTPC7_CMDQ_GLBL_CFG1, 0); -} - static void goya_dma_stall(struct hl_device *hdev) { WREG32(mmDMA_QM_0_GLBL_CFG1, 1 << DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT); @@ -2905,20 +2866,6 @@ int goya_suspend(struct hl_device *hdev) { int rc; - rc = goya_stop_internal_queues(hdev); - - if (rc) { - dev_err(hdev->dev, "failed to stop internal queues\n"); - return rc; - } - - rc = goya_stop_external_queues(hdev); - - if (rc) { - dev_err(hdev->dev, "failed to stop external queues\n"); - return rc; - } - rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_DISABLE_PCI_ACCESS); if (rc) dev_err(hdev->dev, "Failed to disable PCI access from CPU\n"); @@ -2928,15 +2875,7 @@ int goya_suspend(struct hl_device *hdev) int goya_resume(struct hl_device *hdev) { - int rc; - - goya_resume_external_queues(hdev); - goya_resume_internal_queues(hdev); - - rc = goya_send_pci_access_msg(hdev, ARMCP_PACKET_ENABLE_PCI_ACCESS); - if (rc) - dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); - return rc; + return goya_init_iatu(hdev); } static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma, -- cgit From 7c22278edd0a931c565a8511dfc1bc57ffbb9166 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Sun, 3 Mar 2019 10:23:29 +0200 Subject: habanalabs: cast to expected type This patch fix the following sparse warning: drivers/misc/habanalabs/goya/goya.c:3646:14: warning: incorrect type in assignment (different address spaces) drivers/misc/habanalabs/goya/goya.c:3646:14: expected void *base drivers/misc/habanalabs/goya/goya.c:3646:14: got void [noderef] * Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/goya/goya.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 538d8d59d9dc..ea979ebd62fb 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -3009,7 +3009,7 @@ void *goya_get_int_queue_base(struct hl_device *hdev, u32 queue_id, *dma_handle = hdev->asic_prop.sram_base_address; - base = hdev->pcie_bar[SRAM_CFG_BAR_ID]; + base = (void *) hdev->pcie_bar[SRAM_CFG_BAR_ID]; switch (queue_id) { case GOYA_QUEUE_ID_MME: -- cgit From 1e18d5e6731d674fee0bb4b66f5ea61e504452a3 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 1 Mar 2019 15:04:12 +0800 Subject: drm/i915/gvt: Only assign ppgtt root at dispatch time This moves ppgtt root hook out of scan and shadow function, as it's only required at dispatch time. Also make sure this checks against shadow mm to be ready, otherwise bail to fail earlier. Reviewed-by: Xiong Zhang Cc: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/scheduler.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index 817d46f25f09..0f2f2c43c0e0 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -345,7 +345,7 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload, int i = 0; if (mm->type != INTEL_GVT_MM_PPGTT || !mm->ppgtt_mm.shadowed) - return -1; + return -EINVAL; if (mm->ppgtt_mm.root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY) { px_dma(&ppgtt->pml4) = mm->ppgtt_mm.shadow_pdps[0]; @@ -409,12 +409,6 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload) if (workload->shadow) return 0; - ret = set_context_ppgtt_from_shadow(workload, shadow_ctx); - if (ret < 0) { - gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); - return ret; - } - /* pin shadow context by gvt even the shadow context will be pinned * when i915 alloc request. That is because gvt will update the guest * context from shadow context when workload is completed, and at that @@ -677,6 +671,8 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) { struct intel_vgpu *vgpu = workload->vgpu; struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv; + struct intel_vgpu_submission *s = &vgpu->submission; + struct i915_gem_context *shadow_ctx = s->shadow_ctx; struct i915_request *rq; int ring_id = workload->ring_id; int ret; @@ -687,6 +683,12 @@ static int dispatch_workload(struct intel_vgpu_workload *workload) mutex_lock(&vgpu->vgpu_lock); mutex_lock(&dev_priv->drm.struct_mutex); + ret = set_context_ppgtt_from_shadow(workload, shadow_ctx); + if (ret < 0) { + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + goto err_req; + } + ret = intel_gvt_workload_req_alloc(workload); if (ret) goto err_req; -- cgit From 72aabfb862e40ee83c136c4f87877c207e6859b7 Mon Sep 17 00:00:00 2001 From: Zhenyu Wang Date: Fri, 1 Mar 2019 15:04:13 +0800 Subject: drm/i915/gvt: Add mutual lock for ppgtt mm LRU list This adds mutex to guard against update of global ppgtt mm LRU list. To resolve error found as below warning. [73130.012162] ------------[ cut here ]------------ [73130.012168] list_add corruption. prev->next should be next (ffff995f970cca50), but was 0000000000000000. (prev=ffff995f0dc5bdf8). [73130.012181] WARNING: CPU: 3 PID: 82 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70 [73130.012183] Modules linked in: btrfs(E) xor(E) zstd_decompress(E) zstd_compress(E) raid6_pq(E) dm_mod(E) kvmgt(E) fuse(E) xt_addrtype(E) nft_compat(E) xt_conntrack(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) libcrc32c(E) br_netfilter(E) bridge(E) stp(E) llc(E) overlay(E) devlink(E) nf_tables(E) nfnetlink(E) loop(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) crct10dif_pclmul(E) crc32_pclmul(E) ghash_clmulni_intel(E) mei_me(E) aesni_intel(E) aes_x86_64(E) crypto_simd(E) cryptd(E) glue_helper(E) intel_cstate(E) intel_uncore(E) mei(E) intel_pch_thermal(E) intel_rapl_perf(E) pcspkr(E) iTCO_wdt(E) iTCO_vendor_support(E) idma64(E) sg(E) virt_dma(E) acpi_pad(E) evdev(E) binfmt_misc(E) ip_tables(E) x_tables(E) ipv6(E) autofs4(E) hid_generic(E) usbhid(E) hid(E) ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) xhci_pci(E) sdhci_pci(E) cqhci(E) intel_lpss_pci(E) intel_lpss(E) crc32c_intel(E) xhci_hcd(E) sdhci(E) i2c_i801(E) e1000e(E) mmc_core(E) [73130.012218] ptp(E) pps_core(E) usbcore(E) mfd_core(E) sd_mod(E) fan(E) thermal(E) [73130.012227] CPU: 3 PID: 82 Comm: gvt workload 0 Tainted: G W E 5.0.0-rc7-staging-190226+ #282 [73130.012228] Hardware name: /NUC6i5SYB, BIOS SYSKLi35.86A.0039.2016.0316.1747 03/16/2016 [73130.012232] RIP: 0010:__list_add_valid+0x4d/0x70 [73130.012234] Code: c3 48 89 d1 48 c7 c7 e0 82 91 bb 48 89 c2 e8 44 8a cc ff 0f 0b 31 c0 c3 48 89 c1 4c 89 c6 48 c7 c7 30 83 91 bb e8 2d 8a cc ff <0f> 0b 31 c0 c3 48 89 f2 4c 89 c1 48 89 fe 48 c7 c7 80 83 91 bb e8 [73130.012236] RSP: 0018:ffffa4924107fdd0 EFLAGS: 00010286 [73130.012238] RAX: 0000000000000000 RBX: ffff995d8a5ccf00 RCX: 0000000000000006 [73130.012240] RDX: 0000000000000007 RSI: 0000000000000086 RDI: ffff995faad96680 [73130.012241] RBP: 0000000000000000 R08: 0000000000213a28 R09: 0000000000000084 [73130.012243] R10: 0000000000000000 R11: ffffa4924107fc70 R12: ffff995d8a5ccf78 [73130.012245] R13: ffff995f970c8000 R14: ffff995f0dc5bdf8 R15: ffff995f970cca50 [73130.012247] FS: 0000000000000000(0000) GS:ffff995faad80000(0000) knlGS:0000000000000000 [73130.012249] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [73130.012250] CR2: 00000222e1891000 CR3: 0000000116848002 CR4: 00000000003626e0 [73130.012252] Call Trace: [73130.012258] intel_vgpu_pin_mm+0x7a/0xa0 [73130.012262] workload_thread+0x683/0x12a0 [73130.012266] ? do_wait_intr_irq+0xb0/0xb0 [73130.012269] ? finish_wait+0x80/0x80 [73130.012271] ? intel_vgpu_clean_workloads+0x110/0x110 [73130.012274] kthread+0x116/0x130 [73130.012276] ? kthread_bind+0x30/0x30 [73130.012280] ret_from_fork+0x35/0x40 [73130.012285] WARNING: CPU: 3 PID: 82 at lib/list_debug.c:28 __list_add_valid+0x4d/0x70 [73130.012286] ---[ end trace 458a2e792eec21c0 ]--- v2: - simplify lock handling Reviewed-by: Xiong Zhang Cc: Xiong Zhang Signed-off-by: Zhenyu Wang --- drivers/gpu/drm/i915/gvt/gtt.c | 14 +++++++++++++- drivers/gpu/drm/i915/gvt/gtt.h | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c index c7103dd2d8d5..d7052ab7908c 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.c +++ b/drivers/gpu/drm/i915/gvt/gtt.c @@ -1882,7 +1882,11 @@ struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu, } list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head); + + mutex_lock(&gvt->gtt.ppgtt_mm_lock); list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head); + mutex_unlock(&gvt->gtt.ppgtt_mm_lock); + return mm; } @@ -1967,9 +1971,10 @@ int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm) if (ret) return ret; + mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); list_move_tail(&mm->ppgtt_mm.lru_list, &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head); - + mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock); } return 0; @@ -1980,6 +1985,8 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) struct intel_vgpu_mm *mm; struct list_head *pos, *n; + mutex_lock(&gvt->gtt.ppgtt_mm_lock); + list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) { mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list); @@ -1987,9 +1994,11 @@ static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt) continue; list_del_init(&mm->ppgtt_mm.lru_list); + mutex_unlock(&gvt->gtt.ppgtt_mm_lock); invalidate_ppgtt_mm(mm); return 1; } + mutex_unlock(&gvt->gtt.ppgtt_mm_lock); return 0; } @@ -2659,6 +2668,7 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt) } } INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head); + mutex_init(&gvt->gtt.ppgtt_mm_lock); return 0; } @@ -2699,7 +2709,9 @@ void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu) list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) { mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list); if (mm->type == INTEL_GVT_MM_PPGTT) { + mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock); list_del_init(&mm->ppgtt_mm.lru_list); + mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock); if (mm->ppgtt_mm.shadowed) invalidate_ppgtt_mm(mm); } diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h index d8cb04cc946d..edb610dc5d86 100644 --- a/drivers/gpu/drm/i915/gvt/gtt.h +++ b/drivers/gpu/drm/i915/gvt/gtt.h @@ -88,6 +88,7 @@ struct intel_gvt_gtt { void (*mm_free_page_table)(struct intel_vgpu_mm *mm); struct list_head oos_page_use_list_head; struct list_head oos_page_free_list_head; + struct mutex ppgtt_mm_lock; struct list_head ppgtt_mm_lru_list_head; struct page *scratch_page; -- cgit From 7d3a5eb78e3614b025389d2cd89d6d85e91f5549 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 21:02:18 +0100 Subject: irqchip/imx-irqsteer: Fix of_property_read_u32() error handling gcc points out that irqs_num is not initialized when of_property_read_u32() is an empty stub function: Included from drivers/irqchip/irq-imx-irqsteer.c:7: drivers/irqchip/irq-imx-irqsteer.c: In function 'imx_irqsteer_probe': include/uapi/linux/kernel.h:13:49: error: 'irqs_num' may be used uninitialized in this function [-Werror=maybe-uninitialized] The same can actually happen with CONFIG_OF=y as well, though we don't get a warning then. Add error checking here that lets the code deal with missing or invalid properties as well as avoid the warning. Fixes: 28528fca4908 ("irqchip/imx-irqsteer: Add multi output interrupts support") Signed-off-by: Arnd Bergmann Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-imx-irqsteer.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c index d1098f4da6a4..88df3d00052c 100644 --- a/drivers/irqchip/irq-imx-irqsteer.c +++ b/drivers/irqchip/irq-imx-irqsteer.c @@ -169,8 +169,12 @@ static int imx_irqsteer_probe(struct platform_device *pdev) raw_spin_lock_init(&data->lock); - of_property_read_u32(np, "fsl,num-irqs", &irqs_num); - of_property_read_u32(np, "fsl,channel", &data->channel); + ret = of_property_read_u32(np, "fsl,num-irqs", &irqs_num); + if (ret) + return ret; + ret = of_property_read_u32(np, "fsl,channel", &data->channel); + if (ret) + return ret; /* * There is one output irq for each group of 64 inputs. -- cgit From 7f3d6c8e8f5f041c86c0a9f64e4b4ab7c6373ac2 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 20 Feb 2019 10:19:50 -0800 Subject: soc: bcm: bcm2835-pm: Fix PM_IMAGE_PERI power domain support. We don't have ASB master/slave regs for this domain, so just skip that step. Signed-off-by: Eric Anholt Fixes: 670c672608a1 ("soc: bcm: bcm2835-pm: Add support for power domains under a new binding.") --- drivers/soc/bcm/bcm2835-power.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/soc/bcm/bcm2835-power.c b/drivers/soc/bcm/bcm2835-power.c index 48412957ec7a..4a1b99b773c0 100644 --- a/drivers/soc/bcm/bcm2835-power.c +++ b/drivers/soc/bcm/bcm2835-power.c @@ -150,7 +150,12 @@ struct bcm2835_power { static int bcm2835_asb_enable(struct bcm2835_power *power, u32 reg) { - u64 start = ktime_get_ns(); + u64 start; + + if (!reg) + return 0; + + start = ktime_get_ns(); /* Enable the module's async AXI bridges. */ ASB_WRITE(reg, ASB_READ(reg) & ~ASB_REQ_STOP); @@ -165,7 +170,12 @@ static int bcm2835_asb_enable(struct bcm2835_power *power, u32 reg) static int bcm2835_asb_disable(struct bcm2835_power *power, u32 reg) { - u64 start = ktime_get_ns(); + u64 start; + + if (!reg) + return 0; + + start = ktime_get_ns(); /* Enable the module's async AXI bridges. */ ASB_WRITE(reg, ASB_READ(reg) | ASB_REQ_STOP); -- cgit From 4deabfae643d8852c643664d9088a647abfaa5d0 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 20 Feb 2019 10:19:51 -0800 Subject: soc: bcm: bcm2835-pm: Fix error paths of initialization. The clock driver may probe after ours and so we need to pass the -EPROBE_DEFER out. Fix the other error path while we're here. v2: Use dom->name instead of dom->gov as the flag for initialized domains, since we aren't setting up a governor. Make sure to clear ->clk when no clk is present in the DT. Signed-off-by: Eric Anholt Fixes: 670c672608a1 ("soc: bcm: bcm2835-pm: Add support for power domains under a new binding.") --- drivers/soc/bcm/bcm2835-power.c | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/drivers/soc/bcm/bcm2835-power.c b/drivers/soc/bcm/bcm2835-power.c index 4a1b99b773c0..241c4ed80899 100644 --- a/drivers/soc/bcm/bcm2835-power.c +++ b/drivers/soc/bcm/bcm2835-power.c @@ -485,7 +485,7 @@ static int bcm2835_power_pd_power_off(struct generic_pm_domain *domain) } } -static void +static int bcm2835_init_power_domain(struct bcm2835_power *power, int pd_xlate_index, const char *name) { @@ -493,6 +493,17 @@ bcm2835_init_power_domain(struct bcm2835_power *power, struct bcm2835_power_domain *dom = &power->domains[pd_xlate_index]; dom->clk = devm_clk_get(dev->parent, name); + if (IS_ERR(dom->clk)) { + int ret = PTR_ERR(dom->clk); + + if (ret == -EPROBE_DEFER) + return ret; + + /* Some domains don't have a clk, so make sure that we + * don't deref an error pointer later. + */ + dom->clk = NULL; + } dom->base.name = name; dom->base.power_on = bcm2835_power_pd_power_on; @@ -505,6 +516,8 @@ bcm2835_init_power_domain(struct bcm2835_power *power, pm_genpd_init(&dom->base, NULL, true); power->pd_xlate.domains[pd_xlate_index] = &dom->base; + + return 0; } /** bcm2835_reset_reset - Resets a block that has a reset line in the @@ -602,7 +615,7 @@ static int bcm2835_power_probe(struct platform_device *pdev) { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM0 }, { BCM2835_POWER_DOMAIN_IMAGE_PERI, BCM2835_POWER_DOMAIN_CAM1 }, }; - int ret, i; + int ret = 0, i; u32 id; power = devm_kzalloc(dev, sizeof(*power), GFP_KERNEL); @@ -629,8 +642,11 @@ static int bcm2835_power_probe(struct platform_device *pdev) power->pd_xlate.num_domains = ARRAY_SIZE(power_domain_names); - for (i = 0; i < ARRAY_SIZE(power_domain_names); i++) - bcm2835_init_power_domain(power, i, power_domain_names[i]); + for (i = 0; i < ARRAY_SIZE(power_domain_names); i++) { + ret = bcm2835_init_power_domain(power, i, power_domain_names[i]); + if (ret) + goto fail; + } for (i = 0; i < ARRAY_SIZE(domain_deps); i++) { pm_genpd_add_subdomain(&power->domains[domain_deps[i].parent].base, @@ -644,12 +660,21 @@ static int bcm2835_power_probe(struct platform_device *pdev) ret = devm_reset_controller_register(dev, &power->reset); if (ret) - return ret; + goto fail; of_genpd_add_provider_onecell(dev->parent->of_node, &power->pd_xlate); dev_info(dev, "Broadcom BCM2835 power domains driver"); return 0; + +fail: + for (i = 0; i < ARRAY_SIZE(power_domain_names); i++) { + struct generic_pm_domain *dom = &power->domains[i].base; + + if (dom->name) + pm_genpd_remove(dom); + } + return ret; } static int bcm2835_power_remove(struct platform_device *pdev) -- cgit From 544e784188f1dd7c797c70b213385e67d92005b6 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Mon, 4 Mar 2019 18:48:37 -0300 Subject: ARM: dts: bcm283x: Fix hdmi hpd gpio pull Raspberry pi board model B revison 2 have the hot plug detector gpio active high (and not low as it was in the dts). Signed-off-by: Helen Koike Fixes: 49ac67e0c39c ("ARM: bcm2835: Add VC4 to the device tree.") Reviewed-by: Eric Anholt Signed-off-by: Eric Anholt --- arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts index 5641d162dfdb..28e7513ce617 100644 --- a/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts +++ b/arch/arm/boot/dts/bcm2835-rpi-b-rev2.dts @@ -93,7 +93,7 @@ }; &hdmi { - hpd-gpios = <&gpio 46 GPIO_ACTIVE_LOW>; + hpd-gpios = <&gpio 46 GPIO_ACTIVE_HIGH>; }; &pwm { -- cgit From 6dd356d8fc0641a8f0afb6ba457fb9351eea5076 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 5 Mar 2019 09:16:29 -0800 Subject: ARC: unaligned: relax the check for gcc supporting -mno-unaligned-access Without bleeding edge gcc, kernel builds were tripping everywhere. So current gcc will generate unaligned code despite !CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS but that is something we have to live with. Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 15da43d2e416..a9c88b7e9182 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -314,18 +314,6 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL1(cpu->extn_mpy.ver, mpy_opt), IS_AVAIL1(cpu->isa.div_rem, "div_rem ")); - -#if defined(__ARC_UNALIGNED__) && !defined(CONFIG_ARC_USE_UNALIGNED_MEM_ACCESS) - /* - * gcc 7.3.1 (GNU 2018.03) onwards generate unaligned access by default - * but -mno-unaligned-access to disable that didn't work until gcc 8.2.1 - * (GNU 2019.03). So landing here implies the interim period, when - * despite Kconfig being off, gcc is generating unaligned accesses which - * could bomb later on. So better to disallow such broken builds - */ - BUILD_BUG_ON_MSG(1, "gcc doesn't support -mno-unaligned-access"); -#endif - if (cpu->bpu.ver) { n += scnprintf(buf + n, len - n, "BPU\t\t: %s%s match, cache:%d, Predict Table:%d Return stk: %d", -- cgit From cd479eccd2e057116d504852814402a1e68ead80 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 4 Mar 2019 12:33:28 +0100 Subject: s390: limit brk randomization to 32MB For a 64-bit process the randomization of the program break is quite large with 1GB. That is as big as the randomization of the anonymous mapping base, for a test case started with '/lib/ld64.so.1 ' it can happen that the heap is placed after the stack. To avoid this limit the program break randomization to 32MB for 64-bit and keep 8MB for 31-bit. Reported-by: Stefan Liebler Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 7d22a474a040..f74639a05f0f 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -252,11 +252,14 @@ do { \ /* * Cache aliasing on the latest machines calls for a mapping granularity - * of 512KB. For 64-bit processes use a 512KB alignment and a randomization - * of up to 1GB. For 31-bit processes the virtual address space is limited, - * use no alignment and limit the randomization to 8MB. + * of 512KB for the anonymous mapping base. For 64-bit processes use a + * 512KB alignment and a randomization of up to 1GB. For 31-bit processes + * the virtual address space is limited, use no alignment and limit the + * randomization to 8MB. + * For the additional randomization of the program break use 32MB for + * 64-bit and 8MB for 31-bit. */ -#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ffffUL) +#define BRK_RND_MASK (is_compat_task() ? 0x7ffUL : 0x1fffUL) #define MMAP_RND_MASK (is_compat_task() ? 0x7ffUL : 0x3ff80UL) #define MMAP_ALIGN_MASK (is_compat_task() ? 0 : 0x7fUL) #define STACK_RND_MASK MMAP_RND_MASK -- cgit From 01396a374c3d31bc5f8b693026cfa9a657319624 Mon Sep 17 00:00:00 2001 From: Harald Freudenberger Date: Fri, 22 Feb 2019 17:24:11 +0100 Subject: s390/zcrypt: revisit ap device remove procedure Working with the vfio-ap driver let to some revisit of the way how an ap (queue) device is removed from the driver. With the current implementation all the cleanup was done before the driver even got notified about the removal. Now the ap queue removal is done in 3 steps: 1) A preparation step, all ap messages within the queue are flushed and so the driver does 'receive' them. Also a new state AP_STATE_REMOVE assigned to the queue makes sure there are no new messages queued in. 2) Now the driver's remove function is invoked and the driver should do the job of cleaning up it's internal administration lists or whatever. After 2) is done it is guaranteed, that the driver is not invoked any more. On the other hand the driver has to make sure that the APQN is not accessed any more after step 2 is complete. 3) Now the ap bus code does the job of total cleanup of the APQN. A reset with zero is triggered and the state of the queue goes to AP_STATE_UNBOUND. After step 3) is complete, the ap queue has no pending messages and the APQN is cleared and so there are no requests and replies lingering around in the firmware queue for this APQN. Also the interrupts are disabled. After these remove steps the ap queue device may be assigned to another driver. Stress testing this remove/probe procedure showed a problem with the correct module reference counting. The actual receive of an reply in the driver is done asynchronous with completions. So with a driver change on an ap queue the message flush triggers completions but the threads waiting for the completions may run at a time where the queue already has the new driver assigned. So the module_put() at receive time needs to be done on the driver module which queued the ap message. This change is also part of this patch. Signed-off-by: Harald Freudenberger Reviewed-by: Ingo Franzki Signed-off-by: Martin Schwidefsky --- drivers/s390/crypto/ap_bus.c | 9 ++++++++- drivers/s390/crypto/ap_bus.h | 2 ++ drivers/s390/crypto/ap_queue.c | 26 +++++++++++++++++++++----- drivers/s390/crypto/zcrypt_api.c | 30 ++++++++++++++++++------------ 4 files changed, 49 insertions(+), 18 deletions(-) diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index e15816ff1265..033a1acabf48 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -810,11 +810,18 @@ static int ap_device_remove(struct device *dev) struct ap_device *ap_dev = to_ap_dev(dev); struct ap_driver *ap_drv = ap_dev->drv; + /* prepare ap queue device removal */ if (is_queue_dev(dev)) - ap_queue_remove(to_ap_queue(dev)); + ap_queue_prepare_remove(to_ap_queue(dev)); + + /* driver's chance to clean up gracefully */ if (ap_drv->remove) ap_drv->remove(ap_dev); + /* now do the ap queue device remove */ + if (is_queue_dev(dev)) + ap_queue_remove(to_ap_queue(dev)); + /* Remove queue/card from list of active queues/cards */ spin_lock_bh(&ap_list_lock); if (is_card_dev(dev)) diff --git a/drivers/s390/crypto/ap_bus.h b/drivers/s390/crypto/ap_bus.h index d0059eae5d94..15a98a673c5c 100644 --- a/drivers/s390/crypto/ap_bus.h +++ b/drivers/s390/crypto/ap_bus.h @@ -91,6 +91,7 @@ enum ap_state { AP_STATE_WORKING, AP_STATE_QUEUE_FULL, AP_STATE_SUSPEND_WAIT, + AP_STATE_REMOVE, /* about to be removed from driver */ AP_STATE_UNBOUND, /* momentary not bound to a driver */ AP_STATE_BORKED, /* broken */ NR_AP_STATES @@ -252,6 +253,7 @@ void ap_bus_force_rescan(void); void ap_queue_init_reply(struct ap_queue *aq, struct ap_message *ap_msg); struct ap_queue *ap_queue_create(ap_qid_t qid, int device_type); +void ap_queue_prepare_remove(struct ap_queue *aq); void ap_queue_remove(struct ap_queue *aq); void ap_queue_suspend(struct ap_device *ap_dev); void ap_queue_resume(struct ap_device *ap_dev); diff --git a/drivers/s390/crypto/ap_queue.c b/drivers/s390/crypto/ap_queue.c index ba261210c6da..6a340f2c3556 100644 --- a/drivers/s390/crypto/ap_queue.c +++ b/drivers/s390/crypto/ap_queue.c @@ -420,6 +420,10 @@ static ap_func_t *ap_jumptable[NR_AP_STATES][NR_AP_EVENTS] = { [AP_EVENT_POLL] = ap_sm_suspend_read, [AP_EVENT_TIMEOUT] = ap_sm_nop, }, + [AP_STATE_REMOVE] = { + [AP_EVENT_POLL] = ap_sm_nop, + [AP_EVENT_TIMEOUT] = ap_sm_nop, + }, [AP_STATE_UNBOUND] = { [AP_EVENT_POLL] = ap_sm_nop, [AP_EVENT_TIMEOUT] = ap_sm_nop, @@ -740,18 +744,31 @@ void ap_flush_queue(struct ap_queue *aq) } EXPORT_SYMBOL(ap_flush_queue); -void ap_queue_remove(struct ap_queue *aq) +void ap_queue_prepare_remove(struct ap_queue *aq) { - ap_flush_queue(aq); + spin_lock_bh(&aq->lock); + /* flush queue */ + __ap_flush_queue(aq); + /* set REMOVE state to prevent new messages are queued in */ + aq->state = AP_STATE_REMOVE; del_timer_sync(&aq->timeout); + spin_unlock_bh(&aq->lock); +} - /* reset with zero, also clears irq registration */ +void ap_queue_remove(struct ap_queue *aq) +{ + /* + * all messages have been flushed and the state is + * AP_STATE_REMOVE. Now reset with zero which also + * clears the irq registration and move the state + * to AP_STATE_UNBOUND to signal that this queue + * is not used by any driver currently. + */ spin_lock_bh(&aq->lock); ap_zapq(aq->qid); aq->state = AP_STATE_UNBOUND; spin_unlock_bh(&aq->lock); } -EXPORT_SYMBOL(ap_queue_remove); void ap_queue_reinit_state(struct ap_queue *aq) { @@ -760,4 +777,3 @@ void ap_queue_reinit_state(struct ap_queue *aq) ap_wait(ap_sm_event(aq, AP_EVENT_POLL)); spin_unlock_bh(&aq->lock); } -EXPORT_SYMBOL(ap_queue_reinit_state); diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index eb93c2d27d0a..689c2af7026a 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -586,6 +586,7 @@ static inline bool zcrypt_check_queue(struct ap_perms *perms, int queue) static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc, struct zcrypt_queue *zq, + struct module **pmod, unsigned int weight) { if (!zq || !try_module_get(zq->queue->ap_dev.drv->driver.owner)) @@ -595,15 +596,15 @@ static inline struct zcrypt_queue *zcrypt_pick_queue(struct zcrypt_card *zc, atomic_add(weight, &zc->load); atomic_add(weight, &zq->load); zq->request_count++; + *pmod = zq->queue->ap_dev.drv->driver.owner; return zq; } static inline void zcrypt_drop_queue(struct zcrypt_card *zc, struct zcrypt_queue *zq, + struct module *mod, unsigned int weight) { - struct module *mod = zq->queue->ap_dev.drv->driver.owner; - zq->request_count--; atomic_sub(weight, &zc->load); atomic_sub(weight, &zq->load); @@ -653,6 +654,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, unsigned int weight, pref_weight; unsigned int func_code; int qid = 0, rc = -ENODEV; + struct module *mod; trace_s390_zcrypt_req(mex, TP_ICARSAMODEXPO); @@ -706,7 +708,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, pref_weight = weight; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -718,7 +720,7 @@ static long zcrypt_rsa_modexpo(struct ap_perms *perms, rc = pref_zq->ops->rsa_modexpo(pref_zq, mex); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); spin_unlock(&zcrypt_list_lock); out: @@ -735,6 +737,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, unsigned int weight, pref_weight; unsigned int func_code; int qid = 0, rc = -ENODEV; + struct module *mod; trace_s390_zcrypt_req(crt, TP_ICARSACRT); @@ -788,7 +791,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, pref_weight = weight; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -800,7 +803,7 @@ static long zcrypt_rsa_crt(struct ap_perms *perms, rc = pref_zq->ops->rsa_modexpo_crt(pref_zq, crt); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); spin_unlock(&zcrypt_list_lock); out: @@ -819,6 +822,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, unsigned int func_code; unsigned short *domain; int qid = 0, rc = -ENODEV; + struct module *mod; trace_s390_zcrypt_req(xcRB, TB_ZSECSENDCPRB); @@ -865,7 +869,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, pref_weight = weight; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -881,7 +885,7 @@ static long _zcrypt_send_cprb(struct ap_perms *perms, rc = pref_zq->ops->send_cprb(pref_zq, xcRB, &ap_msg); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); spin_unlock(&zcrypt_list_lock); out: @@ -932,6 +936,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms, unsigned int func_code; struct ap_message ap_msg; int qid = 0, rc = -ENODEV; + struct module *mod; trace_s390_zcrypt_req(xcrb, TP_ZSENDEP11CPRB); @@ -1000,7 +1005,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms, pref_weight = weight; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -1012,7 +1017,7 @@ static long zcrypt_send_ep11_cprb(struct ap_perms *perms, rc = pref_zq->ops->send_ep11_cprb(pref_zq, xcrb, &ap_msg); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); spin_unlock(&zcrypt_list_lock); out_free: @@ -1033,6 +1038,7 @@ static long zcrypt_rng(char *buffer) struct ap_message ap_msg; unsigned int domain; int qid = 0, rc = -ENODEV; + struct module *mod; trace_s390_zcrypt_req(buffer, TP_HWRNGCPRB); @@ -1064,7 +1070,7 @@ static long zcrypt_rng(char *buffer) pref_weight = weight; } } - pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, weight); + pref_zq = zcrypt_pick_queue(pref_zc, pref_zq, &mod, weight); spin_unlock(&zcrypt_list_lock); if (!pref_zq) { @@ -1076,7 +1082,7 @@ static long zcrypt_rng(char *buffer) rc = pref_zq->ops->rng(pref_zq, buffer, &ap_msg); spin_lock(&zcrypt_list_lock); - zcrypt_drop_queue(pref_zc, pref_zq, weight); + zcrypt_drop_queue(pref_zc, pref_zq, mod, weight); spin_unlock(&zcrypt_list_lock); out: -- cgit From 152e9b8676c6e788c6bff095c1eaae7b86df5003 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 6 Mar 2019 13:31:21 +0200 Subject: s390/vtime: steal time exponential moving average To be able to judge the current overcommitment ratio for a CPU add a lowcore field with the exponential moving average of the steal time. The average is updated every tick. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/lowcore.h | 61 +++++++++++++++++++++-------------------- arch/s390/kernel/smp.c | 3 +- arch/s390/kernel/vtime.c | 19 ++++++++----- 3 files changed, 45 insertions(+), 38 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index cc0947e08b6f..5b9f10b1e55d 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -91,52 +91,53 @@ struct lowcore { __u64 hardirq_timer; /* 0x02e8 */ __u64 softirq_timer; /* 0x02f0 */ __u64 steal_timer; /* 0x02f8 */ - __u64 last_update_timer; /* 0x0300 */ - __u64 last_update_clock; /* 0x0308 */ - __u64 int_clock; /* 0x0310 */ - __u64 mcck_clock; /* 0x0318 */ - __u64 clock_comparator; /* 0x0320 */ - __u64 boot_clock[2]; /* 0x0328 */ + __u64 avg_steal_timer; /* 0x0300 */ + __u64 last_update_timer; /* 0x0308 */ + __u64 last_update_clock; /* 0x0310 */ + __u64 int_clock; /* 0x0318*/ + __u64 mcck_clock; /* 0x0320 */ + __u64 clock_comparator; /* 0x0328 */ + __u64 boot_clock[2]; /* 0x0330 */ /* Current process. */ - __u64 current_task; /* 0x0338 */ - __u64 kernel_stack; /* 0x0340 */ + __u64 current_task; /* 0x0340 */ + __u64 kernel_stack; /* 0x0348 */ /* Interrupt, DAT-off and restartstack. */ - __u64 async_stack; /* 0x0348 */ - __u64 nodat_stack; /* 0x0350 */ - __u64 restart_stack; /* 0x0358 */ + __u64 async_stack; /* 0x0350 */ + __u64 nodat_stack; /* 0x0358 */ + __u64 restart_stack; /* 0x0360 */ /* Restart function and parameter. */ - __u64 restart_fn; /* 0x0360 */ - __u64 restart_data; /* 0x0368 */ - __u64 restart_source; /* 0x0370 */ + __u64 restart_fn; /* 0x0368 */ + __u64 restart_data; /* 0x0370 */ + __u64 restart_source; /* 0x0378 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0378 */ - __u64 user_asce; /* 0x0380 */ - __u64 vdso_asce; /* 0x0388 */ + __u64 kernel_asce; /* 0x0380 */ + __u64 user_asce; /* 0x0388 */ + __u64 vdso_asce; /* 0x0390 */ /* * The lpp and current_pid fields form a * 64-bit value that is set as program * parameter with the LPP instruction. */ - __u32 lpp; /* 0x0390 */ - __u32 current_pid; /* 0x0394 */ + __u32 lpp; /* 0x0398 */ + __u32 current_pid; /* 0x039c */ /* SMP info area */ - __u32 cpu_nr; /* 0x0398 */ - __u32 softirq_pending; /* 0x039c */ - __u32 preempt_count; /* 0x03a0 */ - __u32 spinlock_lockval; /* 0x03a4 */ - __u32 spinlock_index; /* 0x03a8 */ - __u32 fpu_flags; /* 0x03ac */ - __u64 percpu_offset; /* 0x03b0 */ - __u64 vdso_per_cpu_data; /* 0x03b8 */ - __u64 machine_flags; /* 0x03c0 */ - __u64 gmap; /* 0x03c8 */ - __u8 pad_0x03d0[0x0400-0x03d0]; /* 0x03d0 */ + __u32 cpu_nr; /* 0x03a0 */ + __u32 softirq_pending; /* 0x03a4 */ + __u32 preempt_count; /* 0x03a8 */ + __u32 spinlock_lockval; /* 0x03ac */ + __u32 spinlock_index; /* 0x03b0 */ + __u32 fpu_flags; /* 0x03b4 */ + __u64 percpu_offset; /* 0x03b8 */ + __u64 vdso_per_cpu_data; /* 0x03c0 */ + __u64 machine_flags; /* 0x03c8 */ + __u64 gmap; /* 0x03d0 */ + __u8 pad_0x03d8[0x0400-0x03d8]; /* 0x03d8 */ /* br %r1 trampoline */ __u16 br_r1_trampoline; /* 0x0400 */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b198ece2aad6..b8eb99685546 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -266,7 +266,8 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; - lc->user_timer = lc->system_timer = lc->steal_timer = 0; + lc->user_timer = lc->system_timer = + lc->steal_timer = lc->avg_steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); save_access_regs((unsigned int *) lc->access_regs_save_area); memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 98f850e00008..a69a0911ed0e 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -124,7 +124,7 @@ static void account_system_index_scaled(struct task_struct *p, u64 cputime, */ static int do_account_vtime(struct task_struct *tsk) { - u64 timer, clock, user, guest, system, hardirq, softirq, steal; + u64 timer, clock, user, guest, system, hardirq, softirq; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -182,12 +182,6 @@ static int do_account_vtime(struct task_struct *tsk) if (softirq) account_system_index_scaled(tsk, softirq, CPUTIME_SOFTIRQ); - steal = S390_lowcore.steal_timer; - if ((s64) steal > 0) { - S390_lowcore.steal_timer = 0; - account_steal_time(cputime_to_nsecs(steal)); - } - return virt_timer_forward(user + guest + system + hardirq + softirq); } @@ -213,8 +207,19 @@ void vtime_task_switch(struct task_struct *prev) */ void vtime_flush(struct task_struct *tsk) { + u64 steal, avg_steal; + if (do_account_vtime(tsk)) virt_timer_expire(); + + steal = S390_lowcore.steal_timer; + avg_steal = S390_lowcore.avg_steal_timer / 2; + if ((s64) steal > 0) { + S390_lowcore.steal_timer = 0; + account_steal_time(steal); + avg_steal += steal; + } + S390_lowcore.avg_steal_timer = avg_steal; } /* -- cgit From cd44bc40a1f1eb4e259889579d599f30b1287828 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Thu, 28 Feb 2019 14:31:31 +0100 Subject: mt76: introduce q->stopped parameter Introduce mt76_queue stopped parameter in order to run ieee80211_wake_queue only when mac80211 queues have been previously stopped and avoid to disable interrupts when it is not necessary Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 5 ++++- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/tx.c | 5 ++++- drivers/net/wireless/mediatek/mt76/usb.c | 6 +++++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 6eedc0ec7661..99e341cb1f92 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -180,7 +180,10 @@ mt76_dma_tx_cleanup(struct mt76_dev *dev, enum mt76_txq_id qid, bool flush) else mt76_dma_sync_idx(dev, q); - wake = wake && qid < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8; + wake = wake && q->stopped && + qid < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8; + if (wake) + q->stopped = false; if (!q->queued) wake_up(&dev->tx_wait); diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 5dfb0601f101..3152b8c73c4a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -126,6 +126,7 @@ struct mt76_queue { int ndesc; int queued; int buf_size; + bool stopped; u8 buf_offset; u8 hw_idx; diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 5a349fe3e576..4d38a54014e8 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -289,8 +289,11 @@ mt76_tx(struct mt76_dev *dev, struct ieee80211_sta *sta, dev->queue_ops->tx_queue_skb(dev, q, skb, wcid, sta); dev->queue_ops->kick(dev, q); - if (q->queued > q->ndesc - 8) + if (q->queued > q->ndesc - 8 && !q->stopped) { ieee80211_stop_queue(dev->hw, skb_get_queue_mapping(skb)); + q->stopped = true; + } + spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(mt76_tx); diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c index ae6ada370597..4c1abd492405 100644 --- a/drivers/net/wireless/mediatek/mt76/usb.c +++ b/drivers/net/wireless/mediatek/mt76/usb.c @@ -655,7 +655,11 @@ static void mt76u_tx_tasklet(unsigned long data) spin_lock_bh(&q->lock); } mt76_txq_schedule(dev, q); - wake = i < IEEE80211_NUM_ACS && q->queued < q->ndesc - 8; + + wake = q->stopped && q->queued < q->ndesc - 8; + if (wake) + q->stopped = false; + if (!q->queued) wake_up(&dev->tx_wait); -- cgit From fc7801021733b9fbf213ae2bde5dc5e73896a9c7 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Feb 2019 19:38:29 +0100 Subject: mt76: rewrite dma descriptor base and ring size on queue reset Useful in case the hardware reset clobbers these values Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/dma.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c index 99e341cb1f92..76629b98c78d 100644 --- a/drivers/net/wireless/mediatek/mt76/dma.c +++ b/drivers/net/wireless/mediatek/mt76/dma.c @@ -130,6 +130,8 @@ mt76_dma_tx_cleanup_idx(struct mt76_dev *dev, struct mt76_queue *q, int idx, static void mt76_dma_sync_idx(struct mt76_dev *dev, struct mt76_queue *q) { + iowrite32(q->desc_dma, &q->regs->desc_base); + iowrite32(q->ndesc, &q->regs->ring_size); q->head = ioread32(&q->regs->dma_idx); q->tail = q->head; iowrite32(q->head, &q->regs->cpu_idx); -- cgit From de3c2af15fce23c42407ad0a868ac47df2e7279a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Feb 2019 19:40:27 +0100 Subject: mt76: mt76x02: when setting a key, use PN from mac80211 Preparation for full device restart support Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 91ff6598eccf..8109bac5aee6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -73,6 +73,7 @@ int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, enum mt76x02_cipher_type cipher; u8 key_data[32]; u8 iv_data[8]; + u64 pn; cipher = mt76x02_mac_get_key_info(key, key_data); if (cipher == MT_CIPHER_NONE && key) @@ -85,9 +86,22 @@ int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, if (key) { mt76_rmw_field(dev, MT_WCID_ATTR(idx), MT_WCID_ATTR_PAIRWISE, !!(key->flags & IEEE80211_KEY_FLAG_PAIRWISE)); + + pn = atomic64_read(&key->tx_pn); + iv_data[3] = key->keyidx << 6; - if (cipher >= MT_CIPHER_TKIP) + if (cipher >= MT_CIPHER_TKIP) { iv_data[3] |= 0x20; + put_unaligned_le32(pn >> 16, &iv_data[4]); + } + + if (cipher == MT_CIPHER_TKIP) { + iv_data[0] = (pn >> 8) & 0xff; + iv_data[1] = (iv_data[0] | 0x20) & 0x7f; + iv_data[2] = pn & 0xff; + } else if (cipher >= MT_CIPHER_AES_CCMP) { + put_unaligned_le16((pn & 0xffff), &iv_data[0]); + } } mt76_wr_copy(dev, MT_WCID_IV(idx), iv_data, sizeof(iv_data)); -- cgit From 004960423fe17dfff93753017b7081dab36c7180 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 27 Feb 2019 19:42:39 +0100 Subject: mt76: mt76x2: implement full device restart on watchdog reset Restart the firmware and re-initialize the MAC to be able to recover from more kinds of hang states Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76.h | 1 + drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 26 +++++++ drivers/net/wireless/mediatek/mt76/mt76x02_mac.h | 2 + drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 81 +++++++++++++++++++--- drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 4 ++ drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h | 1 + .../net/wireless/mediatek/mt76/mt76x2/pci_init.c | 2 +- .../net/wireless/mediatek/mt76/mt76x2/pci_mcu.c | 21 ++++++ drivers/net/wireless/mediatek/mt76/tx.c | 3 + 9 files changed, 132 insertions(+), 9 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index 3152b8c73c4a..cb50e96e736b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -144,6 +144,7 @@ struct mt76_mcu_ops { const struct mt76_reg_pair *rp, int len); int (*mcu_rd_rp)(struct mt76_dev *dev, u32 base, struct mt76_reg_pair *rp, int len); + int (*mcu_restart)(struct mt76_dev *dev); }; struct mt76_queue_ops { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index 8109bac5aee6..e1e0c8da5a8c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -67,6 +67,32 @@ int mt76x02_mac_shared_key_setup(struct mt76x02_dev *dev, u8 vif_idx, } EXPORT_SYMBOL_GPL(mt76x02_mac_shared_key_setup); +void mt76x02_mac_wcid_sync_pn(struct mt76x02_dev *dev, u8 idx, + struct ieee80211_key_conf *key) +{ + enum mt76x02_cipher_type cipher; + u8 key_data[32]; + u32 iv, eiv; + u64 pn; + + cipher = mt76x02_mac_get_key_info(key, key_data); + iv = mt76_rr(dev, MT_WCID_IV(idx)); + eiv = mt76_rr(dev, MT_WCID_IV(idx) + 4); + + pn = (u64)eiv << 16; + if (cipher == MT_CIPHER_TKIP) { + pn |= (iv >> 16) & 0xff; + pn |= (iv & 0xff) << 8; + } else if (cipher >= MT_CIPHER_AES_CCMP) { + pn |= iv & 0xffff; + } else { + return; + } + + atomic64_set(&key->tx_pn, pn); +} + + int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, struct ieee80211_key_conf *key) { diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h index 6b1f25d2f64c..caeeef96c42f 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.h @@ -177,6 +177,8 @@ int mt76x02_mac_shared_key_setup(struct mt76x02_dev *dev, u8 vif_idx, u8 key_idx, struct ieee80211_key_conf *key); int mt76x02_mac_wcid_set_key(struct mt76x02_dev *dev, u8 idx, struct ieee80211_key_conf *key); +void mt76x02_mac_wcid_sync_pn(struct mt76x02_dev *dev, u8 idx, + struct ieee80211_key_conf *key); void mt76x02_mac_wcid_setup(struct mt76x02_dev *dev, u8 idx, u8 vif_idx, u8 *mac); void mt76x02_mac_wcid_set_drop(struct mt76x02_dev *dev, u8 idx, bool drop); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 1229f19f2b02..9de015bcd4f9 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -19,6 +19,7 @@ #include #include "mt76x02.h" +#include "mt76x02_mcu.h" #include "mt76x02_trace.h" struct beacon_bc_data { @@ -418,9 +419,65 @@ static bool mt76x02_tx_hang(struct mt76x02_dev *dev) return i < 4; } +static void mt76x02_key_sync(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key, void *data) +{ + struct mt76x02_dev *dev = hw->priv; + struct mt76_wcid *wcid; + + if (!sta) + return; + + wcid = (struct mt76_wcid *) sta->drv_priv; + + if (wcid->hw_key_idx != key->keyidx || wcid->sw_iv) + return; + + mt76x02_mac_wcid_sync_pn(dev, wcid->idx, key); +} + +static void mt76x02_reset_state(struct mt76x02_dev *dev) +{ + int i; + + clear_bit(MT76_STATE_RUNNING, &dev->mt76.state); + + rcu_read_lock(); + + ieee80211_iter_keys_rcu(dev->mt76.hw, NULL, mt76x02_key_sync, NULL); + + for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid); i++) { + struct mt76_wcid *wcid = rcu_dereference(dev->mt76.wcid[i]); + struct mt76x02_sta *msta; + struct ieee80211_sta *sta; + struct ieee80211_vif *vif; + void *priv; + + if (!wcid) + continue; + + priv = msta = container_of(wcid, struct mt76x02_sta, wcid); + sta = container_of(priv, struct ieee80211_sta, drv_priv); + + priv = msta->vif; + vif = container_of(priv, struct ieee80211_vif, drv_priv); + + mt76_sta_state(dev->mt76.hw, vif, sta, + IEEE80211_STA_NONE, IEEE80211_STA_NOTEXIST); + memset(msta, 0, sizeof(*msta)); + } + + rcu_read_unlock(); + + dev->vif_mask = 0; + dev->beacon_mask = 0; +} + static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) { u32 mask = dev->mt76.mmio.irqmask; + bool restart = dev->mt76.mcu_ops->mcu_restart; int i; ieee80211_stop_queues(dev->mt76.hw); @@ -432,6 +489,9 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) napi_disable(&dev->mt76.napi[i]); + if (restart) + mt76x02_reset_state(dev); + mutex_lock(&dev->mt76.mutex); if (dev->beacon_mask) @@ -452,20 +512,21 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) /* let fw reset DMA */ mt76_set(dev, 0x734, 0x3); + if (restart) + dev->mt76.mcu_ops->mcu_restart(&dev->mt76); + for (i = 0; i < ARRAY_SIZE(dev->mt76.q_tx); i++) mt76_queue_tx_cleanup(dev, i, true); for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++) mt76_queue_rx_reset(dev, i); - mt76_wr(dev, MT_MAC_SYS_CTRL, - MT_MAC_SYS_CTRL_ENABLE_TX | MT_MAC_SYS_CTRL_ENABLE_RX); - mt76_set(dev, MT_WPDMA_GLO_CFG, - MT_WPDMA_GLO_CFG_TX_DMA_EN | MT_WPDMA_GLO_CFG_RX_DMA_EN); + mt76x02_mac_start(dev); + if (dev->ed_monitor) mt76_set(dev, MT_TXOP_CTRL_CFG, MT_TXOP_ED_CCA_EN); - if (dev->beacon_mask) + if (dev->beacon_mask && !restart) mt76_set(dev, MT_BEACON_TIME_CFG, MT_BEACON_TIME_CFG_BEACON_TX | MT_BEACON_TIME_CFG_TBTT_EN); @@ -486,9 +547,13 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) napi_schedule(&dev->mt76.napi[i]); } - ieee80211_wake_queues(dev->mt76.hw); - - mt76_txq_schedule_all(&dev->mt76); + if (restart) { + mt76x02_mcu_function_select(dev, Q_SELECT, 1); + ieee80211_restart_hw(dev->mt76.hw); + } else { + ieee80211_wake_queues(dev->mt76.hw); + mt76_txq_schedule_all(&dev->mt76); + } } static void mt76x02_check_tx_hang(struct mt76x02_dev *dev) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index a48c261b0c63..28eccb3119d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -237,6 +237,8 @@ int mt76x02_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif, struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv; int idx = 0; + memset(msta, 0, sizeof(*msta)); + idx = mt76_wcid_alloc(dev->mt76.wcid_mask, ARRAY_SIZE(dev->mt76.wcid)); if (idx < 0) return -ENOSPC; @@ -274,6 +276,8 @@ mt76x02_vif_init(struct mt76x02_dev *dev, struct ieee80211_vif *vif, struct mt76x02_vif *mvif = (struct mt76x02_vif *)vif->drv_priv; struct mt76_txq *mtxq; + memset(mvif, 0, sizeof(*mvif)); + mvif->idx = idx; mvif->group_wcid.idx = MT_VIF_WCID(idx); mvif->group_wcid.hw_key_idx = -1; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h index 6c619f1c65c9..d7abe3d73bad 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/mt76x2.h @@ -71,6 +71,7 @@ int mt76x2_mcu_load_cr(struct mt76x02_dev *dev, u8 type, u8 temp_level, void mt76x2_cleanup(struct mt76x02_dev *dev); +int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard); void mt76x2_reset_wlan(struct mt76x02_dev *dev, bool enable); void mt76x2_init_txpower(struct mt76x02_dev *dev, struct ieee80211_supported_band *sband); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c index 984d9c4c2e1a..d3927a13e92e 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c @@ -77,7 +77,7 @@ mt76x2_fixup_xtal(struct mt76x02_dev *dev) } } -static int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard) +int mt76x2_mac_reset(struct mt76x02_dev *dev, bool hard) { const u8 *macaddr = dev->mt76.macaddr; u32 val; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c index 03e24ae7f66c..605dc66ae83b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_mcu.c @@ -165,9 +165,30 @@ error: return -ENOENT; } +static int +mt76pci_mcu_restart(struct mt76_dev *mdev) +{ + struct mt76x02_dev *dev; + int ret; + + dev = container_of(mdev, struct mt76x02_dev, mt76); + + mt76x02_mcu_cleanup(dev); + mt76x2_mac_reset(dev, true); + + ret = mt76pci_load_firmware(dev); + if (ret) + return ret; + + mt76_wr(dev, MT_WPDMA_RST_IDX, ~0); + + return 0; +} + int mt76x2_mcu_init(struct mt76x02_dev *dev) { static const struct mt76_mcu_ops mt76x2_mcu_ops = { + .mcu_restart = mt76pci_mcu_restart, .mcu_send_msg = mt76x02_mcu_msg_send, }; int ret; diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index 4d38a54014e8..fc7dffe066be 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -580,6 +580,9 @@ void mt76_wake_tx_queue(struct ieee80211_hw *hw, struct ieee80211_txq *txq) struct mt76_txq *mtxq = (struct mt76_txq *) txq->drv_priv; struct mt76_queue *hwq = mtxq->hwq; + if (!test_bit(MT76_STATE_RUNNING, &dev->state)) + return; + spin_lock_bh(&hwq->lock); if (list_empty(&mtxq->list)) list_add_tail(&mtxq->list, &hwq->swq); -- cgit From 7b25d3b8e485c7721cba9c71b44d1c286e61c8e7 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 28 Feb 2019 16:11:06 +0100 Subject: mt76x02: fix hdr pointer in write txwi for USB Since we add txwi at the begining of skb->data, it no longer point to ieee80211_hdr. This breaks settings TS bit for probe response and beacons. Acked-by: Lorenzo Bianconi Signed-off-by: Stanislaw Gruszka Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c index 43f07461c8d3..6fb52b596d42 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_core.c @@ -85,8 +85,9 @@ int mt76x02u_tx_prepare_skb(struct mt76_dev *mdev, void *data, mt76x02_insert_hdr_pad(skb); - txwi = skb_push(skb, sizeof(struct mt76x02_txwi)); + txwi = (struct mt76x02_txwi *)(skb->data - sizeof(struct mt76x02_txwi)); mt76x02_mac_write_txwi(dev, txwi, skb, wcid, sta, len); + skb_push(skb, sizeof(struct mt76x02_txwi)); pid = mt76_tx_status_skb_add(mdev, wcid, skb); txwi->pktid = pid; -- cgit From 3fd0824a2f800a2870569d385917ae1102647055 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Fri, 1 Mar 2019 16:51:03 +0100 Subject: mt76: mt76x02: only update the base mac address if necessary Also update the mask first before calculating the vif index. Fixes an issue where adding back the same interfaces in a different order fails because of duplicate vif index use Fixes: 06662264ce2ad ("mt76x02: use mask for vifs") Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c index 28eccb3119d1..cd072ac614f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c @@ -293,6 +293,12 @@ mt76x02_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) struct mt76x02_dev *dev = hw->priv; unsigned int idx = 0; + /* Allow to change address in HW if we create first interface. */ + if (!dev->vif_mask && + (((vif->addr[0] ^ dev->mt76.macaddr[0]) & ~GENMASK(4, 1)) || + memcmp(vif->addr + 1, dev->mt76.macaddr + 1, ETH_ALEN - 1))) + mt76x02_mac_setaddr(dev, vif->addr); + if (vif->addr[0] & BIT(1)) idx = 1 + (((dev->mt76.macaddr[0] ^ vif->addr[0]) >> 2) & 7); @@ -315,10 +321,6 @@ mt76x02_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) if (dev->vif_mask & BIT(idx)) return -EBUSY; - /* Allow to change address in HW if we create first interface. */ - if (!dev->vif_mask && !ether_addr_equal(dev->mt76.macaddr, vif->addr)) - mt76x02_mac_setaddr(dev, vif->addr); - dev->vif_mask |= BIT(idx); mt76x02_vif_init(dev, vif, idx); -- cgit From a0ac806109277bd865b1048ec521f708b195670b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 2 Mar 2019 18:19:20 +0100 Subject: mt76: mt76x02: reduce false positives in ED/CCA tx blocking Full tx blocking (as opposed to CCA blocking) should only happen if there is a continuous non-802.11 signal above the energy detect threshold. Unfortunately the ED/CCA counter can't detect that, as it also counts 802.11 signals as busy. Similar to the vendor code, implement a learning mode that waits until the AGC gain has already been adjusted to the lowest value (due to false CCA events), and the number of false CCA events still remains high, and the blocking threshold is exceeded for more than 5 seconds. Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02.h | 3 +++ drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 25 ++++++++++++++++++++---- drivers/net/wireless/mediatek/mt76/mt76x02_phy.c | 2 ++ 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 6915cce5def9..42cc9da68f7b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -51,6 +51,7 @@ struct mt76x02_calibration { u16 false_cca; s8 avg_rssi_all; s8 agc_gain_adjust; + s8 agc_lowest_gain; s8 low_gain; s8 temp_vco; @@ -114,8 +115,10 @@ struct mt76x02_dev { struct mt76x02_dfs_pattern_detector dfs_pd; /* edcca monitor */ + unsigned long ed_trigger_timeout; bool ed_tx_blocked; bool ed_monitor; + u8 ed_monitor_learning; u8 ed_trigger; u8 ed_silent; ktime_t ed_time; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c index e1e0c8da5a8c..9ed231abe916 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c @@ -960,6 +960,7 @@ void mt76x02_edcca_init(struct mt76x02_dev *dev, bool enable) } } mt76x02_edcca_tx_enable(dev, true); + dev->ed_monitor_learning = true; /* clear previous CCA timer value */ mt76_rr(dev, MT_ED_CCA_TIMER); @@ -969,6 +970,10 @@ EXPORT_SYMBOL_GPL(mt76x02_edcca_init); #define MT_EDCCA_TH 92 #define MT_EDCCA_BLOCK_TH 2 +#define MT_EDCCA_LEARN_TH 50 +#define MT_EDCCA_LEARN_CCA 180 +#define MT_EDCCA_LEARN_TIMEOUT (20 * HZ) + static void mt76x02_edcca_check(struct mt76x02_dev *dev) { ktime_t cur_time; @@ -991,11 +996,23 @@ static void mt76x02_edcca_check(struct mt76x02_dev *dev) dev->ed_trigger = 0; } - if (dev->ed_trigger > MT_EDCCA_BLOCK_TH && - !dev->ed_tx_blocked) + if (dev->cal.agc_lowest_gain && + dev->cal.false_cca > MT_EDCCA_LEARN_CCA && + dev->ed_trigger > MT_EDCCA_LEARN_TH) { + dev->ed_monitor_learning = false; + dev->ed_trigger_timeout = jiffies + 20 * HZ; + } else if (!dev->ed_monitor_learning && + time_is_after_jiffies(dev->ed_trigger_timeout)) { + dev->ed_monitor_learning = true; + mt76x02_edcca_tx_enable(dev, true); + } + + if (dev->ed_monitor_learning) + return; + + if (dev->ed_trigger > MT_EDCCA_BLOCK_TH && !dev->ed_tx_blocked) mt76x02_edcca_tx_enable(dev, false); - else if (dev->ed_silent > MT_EDCCA_BLOCK_TH && - dev->ed_tx_blocked) + else if (dev->ed_silent > MT_EDCCA_BLOCK_TH && dev->ed_tx_blocked) mt76x02_edcca_tx_enable(dev, true); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c index a020c757ba5c..a54b63a96eae 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_phy.c @@ -194,6 +194,8 @@ bool mt76x02_phy_adjust_vga_gain(struct mt76x02_dev *dev) ret = true; } + dev->cal.agc_lowest_gain = dev->cal.agc_gain_adjust >= limit; + return ret; } EXPORT_SYMBOL_GPL(mt76x02_phy_adjust_vga_gain); -- cgit From 7635276989a183bdb424f9e930f836b6264d54dc Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 11:06:19 +0100 Subject: mt76: mt7603: fix tx status HT rate validation Use the correct variable in the check. Fixes an uninitialized variable warning Reported-by: Gustavo A. R. Silva Fixes: c8846e1015022 ("mt76: add driver for MT7603E and MT7628/7688") Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c index 0a0115861b51..5e31d7da96fc 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c @@ -1072,7 +1072,7 @@ out: case MT_PHY_TYPE_HT: final_rate_flags |= IEEE80211_TX_RC_MCS; final_rate &= GENMASK(5, 0); - if (i > 15) + if (final_rate > 15) return false; break; default: -- cgit From 45a042e3026824a7e910db7a4dd38fef0540b902 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 15:10:00 +0100 Subject: mt76: mt76x2: fix external LNA gain settings Devices with external LNA need different values for AGC registers 8 and 9 Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/phy.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c index 1848e8ab2e21..c7e71f2ba2a7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c @@ -260,10 +260,15 @@ mt76x2_phy_set_gain_val(struct mt76x02_dev *dev) gain_val[0] = dev->cal.agc_gain_cur[0] - dev->cal.agc_gain_adjust; gain_val[1] = dev->cal.agc_gain_cur[1] - dev->cal.agc_gain_adjust; - if (dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40) + val = 0x1836 << 16; + if (!mt76x2_has_ext_lna(dev) && + dev->mt76.chandef.width >= NL80211_CHAN_WIDTH_40) val = 0x1e42 << 16; - else - val = 0x1836 << 16; + + if (mt76x2_has_ext_lna(dev) && + dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ && + dev->mt76.chandef.width < NL80211_CHAN_WIDTH_40) + val = 0x0f36 << 16; val |= 0xf8; -- cgit From b8cfd87ac24273e36fbd3ecda631f3ba6566d493 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 15:12:14 +0100 Subject: mt76: mt76x2: fix 2.4 GHz channel gain settings AGC register 35, 37 override for the low gain setting should only be done on 5 GHz. Also, 2.4 GHz needs a different value for register 35 Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/phy.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c index c7e71f2ba2a7..769a9b972044 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/phy.c @@ -285,6 +285,7 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) { u8 *gain = dev->cal.agc_gain_init; u8 low_gain_delta, gain_delta; + u32 agc_35, agc_37; bool gain_change; int low_gain; u32 val; @@ -323,6 +324,16 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) else low_gain_delta = 14; + agc_37 = 0x2121262c; + if (dev->mt76.chandef.chan->band == NL80211_BAND_2GHZ) + agc_35 = 0x11111516; + else if (low_gain == 2) + agc_35 = agc_37 = 0x08080808; + else if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80) + agc_35 = 0x10101014; + else + agc_35 = 0x11111116; + if (low_gain == 2) { mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a990); mt76_wr(dev, MT_BBP(AGC, 35), 0x08080808); @@ -331,15 +342,13 @@ void mt76x2_phy_update_channel_gain(struct mt76x02_dev *dev) dev->cal.agc_gain_adjust = 0; } else { mt76_wr(dev, MT_BBP(RXO, 18), 0xf000a991); - if (dev->mt76.chandef.width == NL80211_CHAN_WIDTH_80) - mt76_wr(dev, MT_BBP(AGC, 35), 0x10101014); - else - mt76_wr(dev, MT_BBP(AGC, 35), 0x11111116); - mt76_wr(dev, MT_BBP(AGC, 37), 0x2121262C); gain_delta = 0; dev->cal.agc_gain_adjust = low_gain_delta; } + mt76_wr(dev, MT_BBP(AGC, 35), agc_35); + mt76_wr(dev, MT_BBP(AGC, 37), agc_37); + dev->cal.agc_gain_cur[0] = gain[0] - gain_delta; dev->cal.agc_gain_cur[1] = gain[1] - gain_delta; mt76x2_phy_set_gain_val(dev); -- cgit From f25e813bf48dce541c29b12cfc19a2c25b6db915 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 18:39:08 +0100 Subject: mt76: mt7603: clear ps filtering mode before releasing buffered frames Fixes sending them, otherwise they loop back right into the buffer Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index b10775ed92e6..8da0b8707d24 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -399,6 +399,8 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw, __skb_queue_head_init(&list); + mt7603_wtbl_set_ps(dev, msta, false); + spin_lock_bh(&dev->ps_lock); skb_queue_walk_safe(&msta->psq, skb, tmp) { if (!nframes) -- cgit From fca9615f1a436d1e9f64042cda08a34fe32ce668 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 18:40:18 +0100 Subject: mt76: mt7603: fix up hardware queue index for PS filtered packets Make the queue index match the hardware queue on which they get sent out Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index d69e82c66ab2..494a48e023ef 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -50,6 +50,10 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) val = le32_to_cpu(txd[0]); skb_set_queue_mapping(skb, FIELD_GET(MT_TXD0_Q_IDX, val)); + val &= ~(MT_TXD0_P_IDX | MT_TXD0_Q_IDX); + val |= FIELD_PREP(MT_TXD0_Q_IDX, MT_TX_HW_QUEUE_MGMT); + txd[0] = cpu_to_le32(val); + spin_lock_bh(&dev->ps_lock); __skb_queue_tail(&msta->psq, skb); if (skb_queue_len(&msta->psq) >= 64) { -- cgit From e004b7006600258615b969f05c4d008c1d68973b Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 18:51:35 +0100 Subject: mt76: mt7603: notify mac80211 about buffered frames in ps queue Also fix the size check for filtered powersave frames Fixes a corner case with waking up clients Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/dma.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c index 494a48e023ef..b3ae0aaea62a 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/dma.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/dma.c @@ -27,12 +27,16 @@ static void mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) { __le32 *txd = (__le32 *)skb->data; + struct ieee80211_hdr *hdr; + struct ieee80211_sta *sta; struct mt7603_sta *msta; struct mt76_wcid *wcid; + void *priv; int idx; u32 val; + u8 tid; - if (skb->len < sizeof(MT_TXD_SIZE) + sizeof(struct ieee80211_hdr)) + if (skb->len < MT_TXD_SIZE + sizeof(struct ieee80211_hdr)) goto free; val = le32_to_cpu(txd[1]); @@ -46,7 +50,7 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) if (!wcid) goto free; - msta = container_of(wcid, struct mt7603_sta, wcid); + priv = msta = container_of(wcid, struct mt7603_sta, wcid); val = le32_to_cpu(txd[0]); skb_set_queue_mapping(skb, FIELD_GET(MT_TXD0_Q_IDX, val)); @@ -54,6 +58,11 @@ mt7603_rx_loopback_skb(struct mt7603_dev *dev, struct sk_buff *skb) val |= FIELD_PREP(MT_TXD0_Q_IDX, MT_TX_HW_QUEUE_MGMT); txd[0] = cpu_to_le32(val); + sta = container_of(priv, struct ieee80211_sta, drv_priv); + hdr = (struct ieee80211_hdr *) &skb->data[MT_TXD_SIZE]; + tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; + ieee80211_sta_set_buffered(sta, tid, true); + spin_lock_bh(&dev->ps_lock); __skb_queue_tail(&msta->psq, skb); if (skb_queue_len(&msta->psq) >= 64) { -- cgit From b7001f46085e06a74e4677b44ac55566f66e55aa Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 19:16:03 +0100 Subject: mt76: mt7603: clear the service period on releasing PS filtered packets These packets have no txwi entry in the ring, so tracking via tx status does not work. To prevent PS poll requests from being unanswered, end the service period right away Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index 8da0b8707d24..ea25eff5e81c 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -416,6 +416,9 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw, } spin_unlock_bh(&dev->ps_lock); + if (!skb_queue_empty(&list)) + ieee80211_sta_eosp(sta); + mt7603_ps_tx_list(dev, &list); if (nframes) -- cgit From ffc9a7ff59a41df9a0a265b2dd04d97f2b35893a Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 19:19:21 +0100 Subject: mt76: when releasing PS frames, end the service period if no frame was found Fixes a rare corner case if the txq dequeue attempt fails, but mac80211 still has PS buffered packets Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/tx.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c index fc7dffe066be..2585df512335 100644 --- a/drivers/net/wireless/mediatek/mt76/tx.c +++ b/drivers/net/wireless/mediatek/mt76/tx.c @@ -377,7 +377,10 @@ mt76_release_buffered_frames(struct ieee80211_hw *hw, struct ieee80211_sta *sta, if (last_skb) { mt76_queue_ps_skb(dev, sta, last_skb, true); dev->queue_ops->kick(dev, hwq); + } else { + ieee80211_sta_eosp(sta); } + spin_unlock_bh(&hwq->lock); } EXPORT_SYMBOL_GPL(mt76_release_buffered_frames); -- cgit From 643749d4a82b150afd2f87b29ca3dda885f8e384 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sun, 3 Mar 2019 19:40:36 +0100 Subject: mt76: mt76x02: disable ED/CCA by default This feature has been reported to cause stability issues on several systems. Disable it until it has been fixed and verified. It can still be enabled through debugfs Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x02.h | 1 + .../net/wireless/mediatek/mt76/mt76x02_debugfs.c | 27 ++++++++++++++++++++++ drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c | 3 ++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index 42cc9da68f7b..b5a36d9fb2bd 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -118,6 +118,7 @@ struct mt76x02_dev { unsigned long ed_trigger_timeout; bool ed_tx_blocked; bool ed_monitor; + u8 ed_monitor_enabled; u8 ed_monitor_learning; u8 ed_trigger; u8 ed_silent; diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c index 7580c5c986ff..b1d6fd4861e3 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c @@ -116,6 +116,32 @@ static int read_agc(struct seq_file *file, void *data) return 0; } +static int +mt76_edcca_set(void *data, u64 val) +{ + struct mt76x02_dev *dev = data; + enum nl80211_dfs_regions region = dev->dfs_pd.region; + + dev->ed_monitor_enabled = !!val; + dev->ed_monitor = dev->ed_monitor_enabled && + region == NL80211_DFS_ETSI; + mt76x02_edcca_init(dev, true); + + return 0; +} + +static int +mt76_edcca_get(void *data, u64 *val) +{ + struct mt76x02_dev *dev = data; + + *val = dev->ed_monitor_enabled; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(fops_edcca, mt76_edcca_get, mt76_edcca_set, + "%lld\n"); + void mt76x02_init_debugfs(struct mt76x02_dev *dev) { struct dentry *dir; @@ -127,6 +153,7 @@ void mt76x02_init_debugfs(struct mt76x02_dev *dev) debugfs_create_u8("temperature", 0400, dir, &dev->cal.temp); debugfs_create_bool("tpc", 0600, dir, &dev->enable_tpc); + debugfs_create_file("edcca", 0400, dir, dev, &fops_edcca); debugfs_create_file("ampdu_stat", 0400, dir, dev, &fops_ampdu_stat); debugfs_create_file("dfs_stats", 0400, dir, dev, &fops_dfs_stat); debugfs_create_devm_seqfile(dev->mt76.dev, "txpower", dir, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c index e4649103efd4..17d12d212d1b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_dfs.c @@ -885,7 +885,8 @@ mt76x02_dfs_set_domain(struct mt76x02_dev *dev, if (dfs_pd->region != region) { tasklet_disable(&dfs_pd->dfs_tasklet); - dev->ed_monitor = region == NL80211_DFS_ETSI; + dev->ed_monitor = dev->ed_monitor_enabled && + region == NL80211_DFS_ETSI; mt76x02_edcca_init(dev, true); dfs_pd->region = region; -- cgit From b126c889743513543d007ac1c5c82b61ef003133 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 4 Mar 2019 08:36:11 +0100 Subject: mt76: mt7603: set moredata flag when queueing ps-filtered packets Clients should poll for more packets afterwards Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/main.c b/drivers/net/wireless/mediatek/mt76/mt7603/main.c index ea25eff5e81c..cc0fe0933b2d 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/main.c @@ -5,6 +5,7 @@ #include #include #include "mt7603.h" +#include "mac.h" #include "eeprom.h" static int @@ -385,6 +386,15 @@ mt7603_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps) mt7603_ps_tx_list(dev, &list); } +static void +mt7603_ps_set_more_data(struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr; + + hdr = (struct ieee80211_hdr *) &skb->data[MT_TXD_SIZE]; + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREDATA); +} + static void mt7603_release_buffered_frames(struct ieee80211_hw *hw, struct ieee80211_sta *sta, @@ -411,6 +421,7 @@ mt7603_release_buffered_frames(struct ieee80211_hw *hw, skb_set_queue_mapping(skb, MT_TXQ_PSD); __skb_unlink(skb, &msta->psq); + mt7603_ps_set_more_data(skb); __skb_queue_tail(&list, skb); nframes--; } -- cgit From 7c1b998d3483acf785c45035c0e14a62023f1edb Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 4 Mar 2019 01:10:00 +0000 Subject: mt76: fix return value check in mt76_wmac_probe() In case of error, the function devm_ioremap_resource() returns ERR_PTR() and never returns NULL. The NULL test in the return value check should be replaced with IS_ERR(). Fixes: c8846e101502 ("mt76: add driver for MT7603E and MT7628/7688") Signed-off-by: Wei Yongjun Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt7603/soc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c index e13fea80d970..b920be1f5718 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c @@ -23,9 +23,9 @@ mt76_wmac_probe(struct platform_device *pdev) } mem_base = devm_ioremap_resource(&pdev->dev, res); - if (!mem_base) { + if (IS_ERR(mem_base)) { dev_err(&pdev->dev, "Failed to get memory resource\n"); - return -EINVAL; + return PTR_ERR(mem_base); } mdev = mt76_alloc_device(&pdev->dev, sizeof(*dev), &mt7603_ops, -- cgit From 411e05f4e87794332f328ca3fa201b731f023db5 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 6 Mar 2019 10:51:12 +0100 Subject: mt76x2u: remove duplicated entry in mt76x2u_device_table Remove duplicated entry in mt76x2u_device_table since Alfa AWUS036ACM and Aukey USB-AC1200 have the same ids Fixes: 62a25dc56990a ("mt76x2u: Add support for Alfa AWUS036ACM") Signed-off-by: Lorenzo Bianconi Signed-off-by: Felix Fietkau --- drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c index ddb6b2c48e01..7a5d539873ca 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c @@ -21,11 +21,10 @@ #include "mt76x2u.h" static const struct usb_device_id mt76x2u_device_table[] = { - { USB_DEVICE(0x0e8d, 0x7612) }, /* Alfa AWUS036ACM */ { USB_DEVICE(0x0b05, 0x1833) }, /* Asus USB-AC54 */ { USB_DEVICE(0x0b05, 0x17eb) }, /* Asus USB-AC55 */ { USB_DEVICE(0x0b05, 0x180b) }, /* Asus USB-N53 B1 */ - { USB_DEVICE(0x0e8d, 0x7612) }, /* Aukey USB-AC1200 */ + { USB_DEVICE(0x0e8d, 0x7612) }, /* Aukey USBAC1200 - Alfa AWUS036ACM */ { USB_DEVICE(0x057c, 0x8503) }, /* Avm FRITZ!WLAN AC860 */ { USB_DEVICE(0x7392, 0xb711) }, /* Edimax EW 7722 UAC */ { USB_DEVICE(0x0846, 0x9053) }, /* Netgear A6210 */ -- cgit From 688cd8bd2c0fa9dc88e5ced55a73ddc79edf875d Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 21:38:42 +0100 Subject: iwlwifi: fix 64-bit division do_div() expects unsigned operands and otherwise triggers a warning like: drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c:465:2: error: comparison of distinct pointer types ('typeof ((rtt_avg)) *' (aka 'long long *') and 'uint64_t *' (aka 'unsigned long long *')) [-Werror,-Wcompare-distinct-pointer-types] do_div(rtt_avg, 6666); ^~~~~~~~~~~~~~~~~~~~~ include/asm-generic/div64.h:222:28: note: expanded from macro 'do_div' (void)(((typeof((n)) *)0) == ((uint64_t *)0)); \ ~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~ 1 error generated. Change the do_div() to the simpler div_s64() that can handle negative inputs correctly. Fixes: 937b10c0de68 ("iwlwifi: mvm: add debug prints for FTM") Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c index e9822a3ec373..94132cfd1f56 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c @@ -460,9 +460,7 @@ static int iwl_mvm_ftm_range_resp_valid(struct iwl_mvm *mvm, u8 request_id, static void iwl_mvm_debug_range_resp(struct iwl_mvm *mvm, u8 index, struct cfg80211_pmsr_result *res) { - s64 rtt_avg = res->ftm.rtt_avg * 100; - - do_div(rtt_avg, 6666); + s64 rtt_avg = div_s64(res->ftm.rtt_avg * 100, 6666); IWL_DEBUG_INFO(mvm, "entry %d\n", index); IWL_DEBUG_INFO(mvm, "\tstatus: %d\n", res->status); -- cgit From 3032f0c9008088a3effdc2622ce16c3e1bcb13a2 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 7 Mar 2019 13:29:59 -0800 Subject: ARCv2: spinlock: remove the extra smp_mb before lock, after unlock - ARCv2 LLSC spinlocks have smp_mb() both before and after the LLSC instructions, which is not required per lkmm ACQ/REL semantics. smp_mb() is only needed _after_ lock and _before_ unlock. So remove the extra barriers. The reason they were there was mainly historical. At the time of initial SMP Linux bringup on HS38 cores, I was too conservative, given the fluidity of both hw and sw. The last attempt to ditch the extra barrier showed some hackbench regression which is apparently not the case now (atleast for LLSC case, read on...) - EX based spinlocks (!CONFIG_ARC_HAS_LLSC) still needs the extra smp_mb(), not due to lkmm, but due to some hardware shenanigans. W/o that, hackbench triggers RCU stall splat so extra DMB is retained !LLSC based systems are not realistic Linux sstem anyways so they can afford to be a nit suboptimal ;-) | [ARCLinux]# for i in (seq 1 1 5) ; do hackbench; done | Running with 10 groups 400 process | INFO: task hackbench:158 blocked for more than 10 seconds. | Not tainted 4.20.0-00005-g96b18288a88e-dirty #117 | "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. | hackbench D 0 158 135 0x00000000 | | Stack Trace: | watchdog: BUG: soft lockup - CPU#3 stuck for 59s! [hackbench:469] | Modules linked in: | Path: (null) | CPU: 3 PID: 469 Comm: hackbench Not tainted 4.20.0-00005-g96b18288a88e-dirty | | [ECR ]: 0x00000000 => Check Programmer's Manual | [EFA ]: 0x00000000 | [BLINK ]: do_exit+0x4a6/0x7d0 | [ERET ]: _raw_write_unlock_irq+0x44/0x5c - And while at it, remove the extar smp_mb() from EX based arch_read_trylock() since the spin lock there guarantees a full barrier anyways - For LLSC case, hackbench threads improves with this patch (HAPS @ 50MHz) ---- before ---- | | [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done | Running with 10 groups 400 threads | Time: 16.253 | Time: 16.445 | Time: 16.590 | Time: 16.721 | Time: 16.544 ---- after ---- | | [ARCLinux]# for i in 1 2 3 4 5; do hackbench 10 thread; done | Running with 10 groups 400 threads | Time: 15.638 | Time: 15.730 | Time: 15.870 | Time: 15.842 | Time: 15.729 Acked-by: Peter Zijlstra (Intel) Signed-off-by: Vineet Gupta --- arch/arc/include/asm/spinlock.h | 49 ++++++++++++----------------------------- 1 file changed, 14 insertions(+), 35 deletions(-) diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 2ba04a7db621..daa914da7968 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -21,8 +21,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int val; - smp_mb(); - __asm__ __volatile__( "1: llock %[val], [%[slock]] \n" " breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */ @@ -34,6 +32,14 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) [LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__) : "memory", "cc"); + /* + * ACQUIRE barrier to ensure load/store after taking the lock + * don't "bleed-up" out of the critical section (leak-in is allowed) + * http://www.spinics.net/lists/kernel/msg2010409.html + * + * ARCv2 only has load-load, store-store and all-all barrier + * thus need the full all-all barrier + */ smp_mb(); } @@ -42,8 +48,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) { unsigned int val, got_it = 0; - smp_mb(); - __asm__ __volatile__( "1: llock %[val], [%[slock]] \n" " breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */ @@ -67,9 +71,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) { smp_mb(); - lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__; - - smp_mb(); + WRITE_ONCE(lock->slock, __ARCH_SPIN_LOCK_UNLOCKED__); } /* @@ -81,8 +83,6 @@ static inline void arch_read_lock(arch_rwlock_t *rw) { unsigned int val; - smp_mb(); - /* * zero means writer holds the lock exclusively, deny Reader. * Otherwise grant lock to first/subseq reader @@ -113,8 +113,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) { unsigned int val, got_it = 0; - smp_mb(); - __asm__ __volatile__( "1: llock %[val], [%[rwlock]] \n" " brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */ @@ -140,8 +138,6 @@ static inline void arch_write_lock(arch_rwlock_t *rw) { unsigned int val; - smp_mb(); - /* * If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__), * deny writer. Otherwise if unlocked grant to writer @@ -175,8 +171,6 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) { unsigned int val, got_it = 0; - smp_mb(); - __asm__ __volatile__( "1: llock %[val], [%[rwlock]] \n" " brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */ @@ -217,17 +211,13 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) : [val] "=&r" (val) : [rwlock] "r" (&(rw->counter)) : "memory", "cc"); - - smp_mb(); } static inline void arch_write_unlock(arch_rwlock_t *rw) { smp_mb(); - rw->counter = __ARCH_RW_LOCK_UNLOCKED__; - - smp_mb(); + WRITE_ONCE(rw->counter, __ARCH_RW_LOCK_UNLOCKED__); } #else /* !CONFIG_ARC_HAS_LLSC */ @@ -237,10 +227,9 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) unsigned int val = __ARCH_SPIN_LOCK_LOCKED__; /* - * This smp_mb() is technically superfluous, we only need the one - * after the lock for providing the ACQUIRE semantics. - * However doing the "right" thing was regressing hackbench - * so keeping this, pending further investigation + * Per lkmm, smp_mb() is only required after _lock (and before_unlock) + * for ACQ and REL semantics respectively. However EX based spinlocks + * need the extra smp_mb to workaround a hardware quirk. */ smp_mb(); @@ -257,14 +246,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) #endif : "memory"); - /* - * ACQUIRE barrier to ensure load/store after taking the lock - * don't "bleed-up" out of the critical section (leak-in is allowed) - * http://www.spinics.net/lists/kernel/msg2010409.html - * - * ARCv2 only has load-load, store-store and all-all barrier - * thus need the full all-all barrier - */ smp_mb(); } @@ -309,8 +290,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) : "memory"); /* - * superfluous, but keeping for now - see pairing version in - * arch_spin_lock above + * see pairing version/comment in arch_spin_lock above */ smp_mb(); } @@ -344,7 +324,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) arch_spin_unlock(&(rw->lock_mutex)); local_irq_restore(flags); - smp_mb(); return ret; } -- cgit From 62c9d2674b31d4c8a674bee86b7edc6da2803aea Mon Sep 17 00:00:00 2001 From: ZhangXiaoxu Date: Sat, 2 Mar 2019 09:17:32 +0800 Subject: inotify: Fix fsnotify_mark refcount leak in inotify_update_existing_watch() Commit 4d97f7d53da7dc83 ("inotify: Add flag IN_MASK_CREATE for inotify_add_watch()") forgot to call fsnotify_put_mark() with IN_MASK_CREATE after fsnotify_find_mark() Fixes: 4d97f7d53da7dc83 ("inotify: Add flag IN_MASK_CREATE for inotify_add_watch()") Signed-off-by: ZhangXiaoxu Signed-off-by: Jan Kara --- fs/notify/inotify/inotify_user.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index e2901fbb9f76..7b53598c8804 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -519,8 +519,10 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, fsn_mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group); if (!fsn_mark) return -ENOENT; - else if (create) - return -EEXIST; + else if (create) { + ret = -EEXIST; + goto out; + } i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); @@ -548,6 +550,7 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* return the wd */ ret = i_mark->wd; +out: /* match the get from fsnotify_find_mark() */ fsnotify_put_mark(fsn_mark); -- cgit From f38a1f0a5a5710b14c0e899628c815522c6111cf Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Fri, 8 Mar 2019 15:58:20 -0800 Subject: libbpf: handle BTF parsing and loading properly This patch splits and cleans up error handling logic for loading BTF data. Previously, if BTF data was parsed successfully, but failed to load into kernel, we'd report nonsensical error code, instead of error returned from btf__load(). Now btf__new() and btf__load() are handled separately with proper cleanup and warning reporting. Fixes: d29d87f7e612 ("btf: separate btf creation and loading") Reported-by: Martin KaFai Lau Signed-off-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Acked-by: Yonghong Song Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index d5b830d60601..5e977d2688da 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -835,12 +835,19 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) obj->efile.maps_shndx = idx; else if (strcmp(name, BTF_ELF_SEC) == 0) { obj->btf = btf__new(data->d_buf, data->d_size); - if (IS_ERR(obj->btf) || btf__load(obj->btf)) { + if (IS_ERR(obj->btf)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_ELF_SEC, PTR_ERR(obj->btf)); - if (!IS_ERR(obj->btf)) - btf__free(obj->btf); obj->btf = NULL; + continue; + } + err = btf__load(obj->btf); + if (err) { + pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n", + BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + err = 0; } } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; -- cgit From 50b7f1b7236bab08ebbbecf90521e84b068d7a17 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Mon, 11 Mar 2019 10:59:53 +0100 Subject: vfio: ccw: only free cp on final interrupt When we get an interrupt for a channel program, it is not necessarily the final interrupt; for example, the issuing guest may request an intermediate interrupt by specifying the program-controlled-interrupt flag on a ccw. We must not switch the state to idle if the interrupt is not yet final; even more importantly, we must not free the translated channel program if the interrupt is not yet final, or the host can crash during cp rewind. Fixes: e5f84dbaea59 ("vfio: ccw: return I/O results asynchronously") Cc: stable@vger.kernel.org # v4.12+ Reviewed-by: Eric Farman Signed-off-by: Cornelia Huck --- drivers/s390/cio/vfio_ccw_drv.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index a10cec0e86eb..0b3b9de45c60 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -72,20 +72,24 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) { struct vfio_ccw_private *private; struct irb *irb; + bool is_final; private = container_of(work, struct vfio_ccw_private, io_work); irb = &private->irb; + is_final = !(scsw_actl(&irb->scsw) & + (SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT)); if (scsw_is_solicited(&irb->scsw)) { cp_update_scsw(&private->cp, &irb->scsw); - cp_free(&private->cp); + if (is_final) + cp_free(&private->cp); } memcpy(private->io_region->irb_area, irb, sizeof(*irb)); if (private->io_trigger) eventfd_signal(private->io_trigger, 1); - if (private->mdev) + if (private->mdev && is_final) private->state = VFIO_CCW_STATE_IDLE; } -- cgit From d9c1bb2f6a2157b38e8eb63af437cb22701d31ee Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Mar 2019 10:52:33 -0800 Subject: perf/core: Restore mmap record type correctly On mmap(), perf_events generates a RECORD_MMAP record and then checks which events are interested in this record. There are currently 2 versions of mmap records: RECORD_MMAP and RECORD_MMAP2. MMAP2 is larger. The event configuration controls which version the user level tool accepts. If the event->attr.mmap2=1 field then MMAP2 record is returned. The perf_event_mmap_output() takes care of this. It checks attr->mmap2 and corrects the record fields before putting it in the sampling buffer of the event. At the end the function restores the modified MMAP record fields. The problem is that the function restores the size but not the type. Thus, if a subsequent event only accepts MMAP type, then it would instead receive an MMAP2 record with a size of MMAP record. This patch fixes the problem by restoring the record type on exit. Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra (Intel) Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Fixes: 13d7a2410fa6 ("perf: Add attr->mmap2 attribute to an event") Link: http://lkml.kernel.org/r/20190307185233.225521-1-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6fb27b564730..514b8e014a2d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7189,6 +7189,7 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_output_handle handle; struct perf_sample_data sample; int size = mmap_event->event_id.header.size; + u32 type = mmap_event->event_id.header.type; int ret; if (!perf_event_mmap_match(event, data)) @@ -7232,6 +7233,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_end(&handle); out: mmap_event->event_id.header.size = size; + mmap_event->event_id.header.type = type; } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) -- cgit From 3ab481a1cfe1511b94e142b648e2c5ade9175ed3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Mar 2019 06:47:45 -0800 Subject: perf script: Support insn output for normal samples MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit perf script -F +insn was only working for PT traces because the PT instruction decoder was filling in the insn/insn_len sample attributes. Support it for non PT samples too on x86 using the existing x86 instruction decoder. This adds some extra checking to ensure that we don't try to decode instructions when using perf.data from a different architecture. % perf record -a sleep 1 % perf script -F ip,sym,insn --xed ffffffff811704c9 remote_function movl %eax, 0x18(%rbx) ffffffff8100bb50 intel_bts_enable_local retq ffffffff81048612 native_apic_mem_write movl %esi, -0xa04000(%rdi) ffffffff81048612 native_apic_mem_write movl %esi, -0xa04000(%rdi) ffffffff81048612 native_apic_mem_write movl %esi, -0xa04000(%rdi) ffffffff810f1f79 generic_exec_single xor %eax, %eax ffffffff811704c9 remote_function movl %eax, 0x18(%rbx) ffffffff8100bb34 intel_bts_enable_local movl 0x2000(%rax), %edx ffffffff81048610 native_apic_mem_write mov %edi, %edi ... Committer testing: Before: # perf script -F ip,sym,insn --xed | head -5 ffffffffa4068804 native_write_msr addb %al, (%rax) ffffffffa4068804 native_write_msr addb %al, (%rax) ffffffffa4068804 native_write_msr addb %al, (%rax) ffffffffa4068806 native_write_msr addb %al, (%rax) ffffffffa4068806 native_write_msr addb %al, (%rax) # perf script -F ip,sym,insn --xed | grep -v "addb %al, (%rax)" # After: # perf script -F ip,sym,insn --xed | head -5 ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) # perf script -F ip,sym,insn --xed | grep -v "addb %al, (%rax)" | head -5 ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068804 native_write_msr wrmsr ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) ffffffffa4068806 native_write_msr nopl %eax, (%rax,%rax,1) # More examples: # perf script -F ip,sym,insn --xed | grep -v native_write_msr | head ffffffffa416b90e tick_check_broadcast_expired btq %rax, 0x1a5f42a(%rip) ffffffffa4956bd0 nmi_cpu_backtrace pushq %r13 ffffffffa415b95e __hrtimer_next_event_base movq 0x18(%rax), %rdx ffffffffa4956bf3 nmi_cpu_backtrace popq %r12 ffffffffa4171d5c smp_call_function_single pause ffffffffa4956bdd nmi_cpu_backtrace mov %ebp, %r12d ffffffffa4797e4d menu_select cmp $0x190, %rax ffffffffa4171d5c smp_call_function_single pause ffffffffa405a7d8 nmi_cpu_backtrace_handler callq 0xffffffffa4956bd0 ffffffffa4797f7a menu_select shr $0x3, %rax # Which matches the annotate output modulo resolving callqs: # perf annotate --stdio2 nmi_cpu_backtrace_handler Samples: 4 of event 'cycles:ppp', 4000 Hz, Event count (approx.): 35908, [percent: local period] nmi_cpu_backtrace_handler() /lib/modules/5.0.0+/build/vmlinux Percent Disassembly of section .text: ffffffff8105a7d0 : nmi_cpu_backtrace_handler(): nmi_trigger_cpumask_backtrace(mask, exclude_self, nmi_raise_cpu_backtrace); } static int nmi_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs) { 24.45 → callq __fentry__ if (nmi_cpu_backtrace(regs)) mov %rsi,%rdi 75.55 → callq nmi_cpu_backtrace return NMI_HANDLED; movzbl %al,%eax return NMI_DONE; } ← retq # # perf annotate --stdio2 __hrtimer_next_event_base Samples: 4 of event 'cycles:ppp', 4000 Hz, Event count (approx.): 767977, [percent: local period] __hrtimer_next_event_base() /lib/modules/5.0.0+/build/vmlinux Percent Disassembly of section .text: ffffffff8115b910 <__hrtimer_next_event_base>: __hrtimer_next_event_base(): static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, const struct hrtimer *exclude, unsigned int active, ktime_t expires_next) { → callq __fentry__ 4a: add $0x1,%r14 77.31 mov 0x18(%rax),%rdx shl $0x6,%r14 sub 0x38(%rbx,%r14,1),%rdx if (expires < expires_next) { cmp %r12,%rdx ↓ jge 68 Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20190305144758.12397-3-andi@firstfloor.org [ Converted fetch_exe() to use the name it ended up having when merged: thread__memcpy() ] [ archinsn.c needs the instruction decoder that is only build when CONFIG_AUXTRACE=y, fix that ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/Build | 1 + tools/perf/arch/x86/util/archinsn.c | 26 ++++++++++++++++++++++++++ tools/perf/builtin-script.c | 21 ++++++++++++++++++++- tools/perf/util/archinsn.h | 12 ++++++++++++ 4 files changed, 59 insertions(+), 1 deletion(-) create mode 100644 tools/perf/arch/x86/util/archinsn.c create mode 100644 tools/perf/util/archinsn.h diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 7aab0be5fc5f..47f9c56e744f 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -14,5 +14,6 @@ perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-$(CONFIG_AUXTRACE) += auxtrace.o +perf-$(CONFIG_AUXTRACE) += archinsn.o perf-$(CONFIG_AUXTRACE) += intel-pt.o perf-$(CONFIG_AUXTRACE) += intel-bts.o diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c new file mode 100644 index 000000000000..4237bb2e7fa2 --- /dev/null +++ b/tools/perf/arch/x86/util/archinsn.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "perf.h" +#include "archinsn.h" +#include "util/intel-pt-decoder/insn.h" +#include "machine.h" +#include "thread.h" +#include "symbol.h" + +void arch_fetch_insn(struct perf_sample *sample, + struct thread *thread, + struct machine *machine) +{ + struct insn insn; + int len; + bool is64bit = false; + + if (!sample->ip) + return; + len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit); + if (len <= 0) + return; + insn_init(&insn, sample->insn, len, is64bit); + insn_get_length(&insn); + if (insn_complete(&insn) && insn.length <= len) + sample->insn_len = insn.length; +} diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 53f78cf3113f..a5080afd361d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -29,10 +29,12 @@ #include "util/time-utils.h" #include "util/path.h" #include "print_binary.h" +#include "archinsn.h" #include #include #include #include +#include #include "asm/bug.h" #include "util/mem-events.h" #include "util/dump-insn.h" @@ -63,6 +65,7 @@ static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; static int max_blocks; +static bool native_arch; unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; @@ -1227,6 +1230,12 @@ static int perf_sample__fprintf_callindent(struct perf_sample *sample, return len + dlen; } +__weak void arch_fetch_insn(struct perf_sample *sample __maybe_unused, + struct thread *thread __maybe_unused, + struct machine *machine __maybe_unused) +{ +} + static int perf_sample__fprintf_insn(struct perf_sample *sample, struct perf_event_attr *attr, struct thread *thread, @@ -1234,9 +1243,12 @@ static int perf_sample__fprintf_insn(struct perf_sample *sample, { int printed = 0; + if (sample->insn_len == 0 && native_arch) + arch_fetch_insn(sample, thread, machine); + if (PRINT_FIELD(INSNLEN)) printed += fprintf(fp, " ilen: %d", sample->insn_len); - if (PRINT_FIELD(INSN)) { + if (PRINT_FIELD(INSN) && sample->insn_len) { int i; printed += fprintf(fp, " insn:"); @@ -3277,6 +3289,7 @@ int cmd_script(int argc, const char **argv) .set = false, .default_no_sample = true, }; + struct utsname uts; char *script_path = NULL; const char **__argv; int i, j, err = 0; @@ -3615,6 +3628,12 @@ int cmd_script(int argc, const char **argv) if (symbol__init(&session->header.env) < 0) goto out_delete; + uname(&uts); + if (!strcmp(uts.machine, session->header.env.arch) || + (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386"))) + native_arch = true; + script.session = session; script__setup_sample_type(&script); diff --git a/tools/perf/util/archinsn.h b/tools/perf/util/archinsn.h new file mode 100644 index 000000000000..448cbb6b8d7e --- /dev/null +++ b/tools/perf/util/archinsn.h @@ -0,0 +1,12 @@ +#ifndef INSN_H +#define INSN_H 1 + +struct perf_sample; +struct machine; +struct thread; + +void arch_fetch_insn(struct perf_sample *sample, + struct thread *thread, + struct machine *machine); + +#endif -- cgit From 52bab8868211b7c504146f6239e101421d4d125b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Mar 2019 06:47:47 -0800 Subject: perf report: Support output in nanoseconds Upcoming changes add timestamp output in perf report. Add a --ns argument similar to perf script to support nanoseconds resolution when needed. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20190305144758.12397-5-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 3 +++ tools/perf/builtin-report.c | 1 + tools/perf/builtin-script.c | 11 +++++------ tools/perf/util/symbol.c | 1 + tools/perf/util/symbol_conf.h | 1 + 5 files changed, 11 insertions(+), 6 deletions(-) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 1a27bfe05039..51dbc519dbce 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -477,6 +477,9 @@ include::itrace.txt[] Please note that not all mmaps are stored, options affecting which ones are include 'perf record --data', for instance. +--ns:: + Show time stamps in nanoseconds. + --stats:: Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ee93c18a6685..515864ba504a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1147,6 +1147,7 @@ int cmd_report(int argc, const char **argv) OPT_CALLBACK(0, "percent-type", &report.annotation_opts, "local-period", "Set percent type local/global-period/hits", annotate_parse_percent_type), + OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"), OPT_END() }; struct perf_data data = { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a5080afd361d..111787e83784 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -60,7 +60,6 @@ static bool no_callchain; static bool latency_format; static bool system_wide; static bool print_flags; -static bool nanosecs; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); static struct perf_stat_config stat_config; @@ -691,7 +690,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, secs = nsecs / NSEC_PER_SEC; nsecs -= secs * NSEC_PER_SEC; - if (nanosecs) + if (symbol_conf.nanosecs) printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs); else { char sample_time[32]; @@ -3244,7 +3243,7 @@ static int parse_insn_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "+insn,-event,-period", 0); itrace_parse_synth_opts(opt, "i0ns", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3262,7 +3261,7 @@ static int parse_call_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent", 0); itrace_parse_synth_opts(opt, "cewp", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3272,7 +3271,7 @@ static int parse_callret_trace(const struct option *opt __maybe_unused, { parse_output_fields(NULL, "-ip,-addr,-event,-period,+callindent,+flags", 0); itrace_parse_synth_opts(opt, "crewp", 0); - nanosecs = true; + symbol_conf.nanosecs = true; return 0; } @@ -3408,7 +3407,7 @@ int cmd_script(int argc, const char **argv) OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), OPT_INTEGER(0, "max-blocks", &max_blocks, "Maximum number of code blocks to dump with brstackinsn"), - OPT_BOOLEAN(0, "ns", &nanosecs, + OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Use 9 decimal places when displaying time"), OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", "Instruction Tracing options\n" ITRACE_HELP, diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 758bf5f74e6e..eb873ea1c405 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -39,6 +39,7 @@ int vmlinux_path__nr_entries; char **vmlinux_path; struct symbol_conf symbol_conf = { + .nanosecs = false, .use_modules = true, .try_vmlinux_path = true, .demangle = true, diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index fffea68c1203..095a297c8b47 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -8,6 +8,7 @@ struct strlist; struct intlist; struct symbol_conf { + bool nanosecs; unsigned short priv_size; bool try_vmlinux_path, init_annotation, -- cgit From f8c856cb2c947f4fad0a2dff5e95cdcddb801303 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Mar 2019 06:47:53 -0800 Subject: perf time-utils: Add utility function to print time stamps in nanoseconds Add a utility function to print nanosecond timestamps. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20190305144758.12397-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 8 ++++++++ tools/perf/util/time-utils.h | 1 + 2 files changed, 9 insertions(+) diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 0f53baec660e..20663a460df3 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -453,6 +453,14 @@ int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) return scnprintf(buf, sz, "%"PRIu64".%06"PRIu64, sec, usec); } +int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz) +{ + u64 sec = timestamp / NSEC_PER_SEC, + nsec = timestamp % NSEC_PER_SEC; + + return scnprintf(buf, sz, "%" PRIu64 ".%09" PRIu64, sec, nsec); +} + int fetch_current_timestamp(char *buf, size_t sz) { struct timeval tv; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index b923de44e36f..72a42ea1d513 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -30,6 +30,7 @@ int perf_time__parse_for_ranges(const char *str, struct perf_session *session, int *range_size, int *range_num); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); +int timestamp__scnprintf_nsec(u64 timestamp, char *buf, size_t sz); int fetch_current_timestamp(char *buf, size_t sz); -- cgit From 2a1292cbd4e5c81edbf815a410fa2072c341db1e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 5 Mar 2019 06:47:48 -0800 Subject: perf report: Parse time quantum Many workloads change over time. 'perf report' currently aggregates the whole time range reported in perf.data. This patch adds an option for a time quantum to quantisize the perf.data over time. This just adds the option, will be used in follow on patches for a time sort key. Signed-off-by: Andi Kleen Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20190305144758.12397-6-andi@firstfloor.org [ Use NSEC_PER_[MU]SEC ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 +++ tools/perf/builtin-report.c | 42 ++++++++++++++++++++++++++++++++ tools/perf/util/symbol.c | 2 ++ tools/perf/util/symbol_conf.h | 1 + 4 files changed, 49 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 51dbc519dbce..9ec1702bccdd 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -497,6 +497,10 @@ include::itrace.txt[] The period/hits keywords set the base the percentage is computed on - the samples period or the number of samples (hits). +--time-quantum:: + Configure time quantum for time sort key. Default 100ms. + Accepts s, us, ms, ns units. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 515864ba504a..05c8dd41106c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -47,9 +47,11 @@ #include #include #include +#include "sane_ctype.h" #include #include #include +#include #include #include #include @@ -926,6 +928,43 @@ report_parse_callchain_opt(const struct option *opt, const char *arg, int unset) return parse_callchain_report_opt(arg); } +static int +parse_time_quantum(const struct option *opt, const char *arg, + int unset __maybe_unused) +{ + unsigned long *time_q = opt->value; + char *end; + + *time_q = strtoul(arg, &end, 0); + if (end == arg) + goto parse_err; + if (*time_q == 0) { + pr_err("time quantum cannot be 0"); + return -1; + } + while (isspace(*end)) + end++; + if (*end == 0) + return 0; + if (!strcmp(end, "s")) { + *time_q *= NSEC_PER_SEC; + return 0; + } + if (!strcmp(end, "ms")) { + *time_q *= NSEC_PER_MSEC; + return 0; + } + if (!strcmp(end, "us")) { + *time_q *= NSEC_PER_USEC; + return 0; + } + if (!strcmp(end, "ns")) + return 0; +parse_err: + pr_err("Cannot parse time quantum `%s'\n", arg); + return -1; +} + int report_parse_ignore_callees_opt(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) @@ -1148,6 +1187,9 @@ int cmd_report(int argc, const char **argv) "Set percent type local/global-period/hits", annotate_parse_percent_type), OPT_BOOLEAN(0, "ns", &symbol_conf.nanosecs, "Show times in nanosecs"), + OPT_CALLBACK(0, "time-quantum", &symbol_conf.time_quantum, "time (ms|us|ns|s)", + "Set time quantum for time sort key (default 100ms)", + parse_time_quantum), OPT_END() }; struct perf_data data = { diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index eb873ea1c405..6b73a0eeb6a1 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -45,6 +46,7 @@ struct symbol_conf symbol_conf = { .demangle = true, .demangle_kernel = false, .cumulate_callchain = true, + .time_quantum = 100 * NSEC_PER_MSEC, /* 100ms */ .show_hist_headers = true, .symfs = "", .event_group = true, diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index 095a297c8b47..a5684a71b78e 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -56,6 +56,7 @@ struct symbol_conf { *sym_list_str, *col_width_list_str, *bt_stop_list_str; + unsigned long time_quantum; struct strlist *dso_list, *comm_list, *sym_list, -- cgit From eaeffeb9838a7c0dec981d258666bfcc0fa6a947 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 4 Mar 2019 15:13:21 +0200 Subject: perf probe: Fix getting the kernel map Since commit 4d99e4136580 ("perf machine: Workaround missing maps for x86 PTI entry trampolines"), perf tools has been creating more than one kernel map, however 'perf probe' assumed there could be only one. Fix by using machine__kernel_map() to get the main kernel map. Signed-off-by: Adrian Hunter Tested-by: Joseph Qi Acked-by: Masami Hiramatsu Cc: Alexander Shishkin Cc: Andy Lutomirski Cc: Greg Kroah-Hartman Cc: Jiufei Xue Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Xu Yu Fixes: 4d99e4136580 ("perf machine: Workaround missing maps for x86 PTI entry trampolines") Fixes: d83212d5dd67 ("kallsyms, x86: Export addresses of PTI entry trampolines") Link: http://lkml.kernel.org/r/2ed432de-e904-85d2-5c36-5897ddc5b23b@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a1b8d9649ca7..198e09ff611e 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -160,8 +160,10 @@ static struct map *kernel_get_module_map(const char *module) if (module && strchr(module, '/')) return dso__new_map(module); - if (!module) - module = "kernel"; + if (!module) { + pos = machine__kernel_map(host_machine); + return map__get(pos); + } for (pos = maps__first(maps); pos; pos = map__next(pos)) { /* short_name is "[module]" */ -- cgit From 98c07a8f74f85a19aeee2016f5afa0c667fa694d Mon Sep 17 00:00:00 2001 From: Martin Liška Date: Wed, 13 Feb 2019 12:19:16 +0100 Subject: perf vendor events amd: perf PMU events for AMD Family 17h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thi patch adds PMC events for AMD Family 17 CPUs as defined in [1]. It covers events described in section: 2.1.13. Regex pattern in mapfile.csv covers all CPUs of the family. [1] https://support.amd.com/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf Signed-off-by: Martin Liška Acked-by: Borislav Petkov Cc: Jiri Olsa Cc: Jon Grimm Cc: Martin Jambor Cc: William Cohen Link: https://lkml.kernel.org/r/d65873ca-e402-b198-4fe9-8c4af81258c8@suse.cz Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/amdfam17h/branch.json | 12 + .../perf/pmu-events/arch/x86/amdfam17h/cache.json | 287 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/amdfam17h/core.json | 134 ++++++++++ .../arch/x86/amdfam17h/floating-point.json | 168 ++++++++++++ .../perf/pmu-events/arch/x86/amdfam17h/memory.json | 162 ++++++++++++ .../perf/pmu-events/arch/x86/amdfam17h/other.json | 65 +++++ tools/perf/pmu-events/arch/x86/mapfile.csv | 1 + 7 files changed, 829 insertions(+) create mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/branch.json create mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/cache.json create mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/core.json create mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json create mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/memory.json create mode 100644 tools/perf/pmu-events/arch/x86/amdfam17h/other.json diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json b/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json new file mode 100644 index 000000000000..93ddfd8053ca --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json @@ -0,0 +1,12 @@ +[ + { + "EventName": "bp_l1_btb_correct", + "EventCode": "0x8a", + "BriefDescription": "L1 BTB Correction." + }, + { + "EventName": "bp_l2_btb_correct", + "EventCode": "0x8b", + "BriefDescription": "L2 BTB Correction." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json new file mode 100644 index 000000000000..fad4af9142cb --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json @@ -0,0 +1,287 @@ +[ + { + "EventName": "ic_fw32", + "EventCode": "0x80", + "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)." + }, + { + "EventName": "ic_fw32_miss", + "EventCode": "0x81", + "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag." + }, + { + "EventName": "ic_cache_fill_l2", + "EventCode": "0x82", + "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache." + }, + { + "EventName": "ic_cache_fill_sys", + "EventCode": "0x83", + "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache." + }, + { + "EventName": "bp_l1_tlb_miss_l2_hit", + "EventCode": "0x84", + "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB." + }, + { + "EventName": "bp_l1_tlb_miss_l2_miss", + "EventCode": "0x85", + "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs." + }, + { + "EventName": "bp_snp_re_sync", + "EventCode": "0x86", + "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event." + }, + { + "EventName": "ic_fetch_stall.ic_stall_any", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).", + "UMask": "0x4" + }, + { + "EventName": "ic_fetch_stall.ic_stall_dq_empty", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.", + "UMask": "0x2" + }, + { + "EventName": "ic_fetch_stall.ic_stall_back_pressure", + "EventCode": "0x87", + "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.", + "UMask": "0x1" + }, + { + "EventName": "ic_cache_inval.l2_invalidating_probe", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).", + "UMask": "0x2" + }, + { + "EventName": "ic_cache_inval.fill_invalidated", + "EventCode": "0x8c", + "BriefDescription": "IC line invalidated due to overwriting fill response.", + "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.", + "UMask": "0x1" + }, + { + "EventName": "bp_tlb_rel", + "EventCode": "0x99", + "BriefDescription": "The number of ITLB reload requests." + }, + { + "EventName": "l2_request_g1.rd_blk_l", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g1.rd_blk_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g1.ls_rd_blk_c_s", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g1.cacheable_ic_read", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g1.change_to_x", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g1.prefetch_l2", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g1.l2_hw_pf", + "EventCode": "0x60", + "BriefDescription": "Requests to L2 Group1.", + "PublicDescription": "Requests to L2 Group1.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g1.other_requests", + "EventCode": "0x60", + "BriefDescription": "Events covered by l2_request_g2.", + "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.", + "UMask": "0x1" + }, + { + "EventName": "l2_request_g2.group1", + "EventCode": "0x61", + "BriefDescription": "All Group 1 commands not in unit0.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.", + "UMask": "0x80" + }, + { + "EventName": "l2_request_g2.ls_rd_sized", + "EventCode": "0x61", + "BriefDescription": "RdSized, RdSized32, RdSized64.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.", + "UMask": "0x40" + }, + { + "EventName": "l2_request_g2.ls_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.", + "UMask": "0x20" + }, + { + "EventName": "l2_request_g2.ic_rd_sized", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x10" + }, + { + "EventName": "l2_request_g2.ic_rd_sized_nc", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x8" + }, + { + "EventName": "l2_request_g2.smc_inval", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x4" + }, + { + "EventName": "l2_request_g2.bus_locks_originator", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x2" + }, + { + "EventName": "l2_request_g2.bus_locks_responses", + "EventCode": "0x61", + "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.", + "UMask": "0x1" + }, + { + "EventName": "l2_latency.l2_cycles_waiting_on_fills", + "EventCode": "0x62", + "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.", + "UMask": "0x1" + }, + { + "EventName": "l2_wcb_req.wcb_write", + "EventCode": "0x63", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.", + "BriefDescription": "LS to L2 WCB write requests.", + "UMask": "0x40" + }, + { + "EventName": "l2_wcb_req.wcb_close", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB close requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.", + "UMask": "0x20" + }, + { + "EventName": "l2_wcb_req.zero_byte_store", + "EventCode": "0x63", + "BriefDescription": "LS to L2 WCB zero byte store requests.", + "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.", + "UMask": "0x4" + }, + { + "EventName": "l2_wcb_req.cl_zero", + "EventCode": "0x63", + "PublicDescription": "LS to L2 WCB cache line zeroing requests.", + "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.", + "UMask": "0x1" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_cs", + "EventCode": "0x64", + "BriefDescription": "LS ReadBlock C/S Hit.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.", + "UMask": "0x80" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x", + "EventCode": "0x64", + "BriefDescription": "LS Read Block L Hit X.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.", + "UMask": "0x40" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkL Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.", + "UMask": "0x20" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_x", + "EventCode": "0x64", + "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.", + "UMask": "0x10" + }, + { + "EventName": "l2_cache_req_stat.ls_rd_blk_c", + "EventCode": "0x64", + "BriefDescription": "LS Read Block C S L X Change to X Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.", + "UMask": "0x8" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_x", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Exclusive Stale.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.", + "UMask": "0x4" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_hit_s", + "EventCode": "0x64", + "BriefDescription": "IC Fill Hit Shared.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.", + "UMask": "0x2" + }, + { + "EventName": "l2_cache_req_stat.ic_fill_miss", + "EventCode": "0x64", + "BriefDescription": "IC Fill Miss.", + "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.", + "UMask": "0x1" + }, + { + "EventName": "l2_fill_pending.l2_fill_busy", + "EventCode": "0x6d", + "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json new file mode 100644 index 000000000000..7b285b0a7f35 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/core.json @@ -0,0 +1,134 @@ +[ + { + "EventName": "ex_ret_instr", + "EventCode": "0xc0", + "BriefDescription": "Retired Instructions." + }, + { + "EventName": "ex_ret_cops", + "EventCode": "0xc1", + "BriefDescription": "Retired Uops.", + "PublicDescription": "The number of uOps retired. This includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 4." + }, + { + "EventName": "ex_ret_brn", + "EventCode": "0xc2", + "BriefDescription": "[Retired Branch Instructions.", + "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_misp", + "EventCode": "0xc3", + "BriefDescription": "Retired Branch Instructions Mispredicted.", + "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)." + }, + { + "EventName": "ex_ret_brn_tkn", + "EventCode": "0xc4", + "BriefDescription": "Retired Taken Branch Instructions.", + "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts." + }, + { + "EventName": "ex_ret_brn_tkn_misp", + "EventCode": "0xc5", + "BriefDescription": "Retired Taken Branch Instructions Mispredicted.", + "PublicDescription": "The number of retired taken branch instructions that were mispredicted." + }, + { + "EventName": "ex_ret_brn_far", + "EventCode": "0xc6", + "BriefDescription": "Retired Far Control Transfers.", + "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction." + }, + { + "EventName": "ex_ret_brn_resync", + "EventCode": "0xc7", + "BriefDescription": "Retired Branch Resyncs.", + "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare." + }, + { + "EventName": "ex_ret_near_ret", + "EventCode": "0xc8", + "BriefDescription": "Retired Near Returns.", + "PublicDescription": "The number of near return instructions (RET or RET Iw) retired." + }, + { + "EventName": "ex_ret_near_ret_mispred", + "EventCode": "0xc9", + "BriefDescription": "Retired Near Returns Mispredicted.", + "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction." + }, + { + "EventName": "ex_ret_brn_ind_misp", + "EventCode": "0xca", + "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.", + "PublicDescription": "Retired Indirect Branch Instructions Mispredicted." + }, + { + "EventName": "ex_ret_mmx_fp_instr.sse_instr", + "EventCode": "0xcb", + "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).", + "UMask": "0x4" + }, + { + "EventName": "ex_ret_mmx_fp_instr.mmx_instr", + "EventCode": "0xcb", + "BriefDescription": "MMX instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.", + "UMask": "0x2" + }, + { + "EventName": "ex_ret_mmx_fp_instr.x87_instr", + "EventCode": "0xcb", + "BriefDescription": "x87 instructions.", + "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_cond", + "EventCode": "0xd1", + "BriefDescription": "Retired Conditional Branch Instructions." + }, + { + "EventName": "ex_ret_cond_misp", + "EventCode": "0xd2", + "BriefDescription": "Retired Conditional Branch Instructions Mispredicted." + }, + { + "EventName": "ex_div_busy", + "EventCode": "0xd3", + "BriefDescription": "Div Cycles Busy count." + }, + { + "EventName": "ex_div_count", + "EventCode": "0xd4", + "BriefDescription": "Div Op Count." + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_count_rollover", + "EventCode": "0x1cf", + "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.", + "UMask": "0x4" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS that retired.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.", + "UMask": "0x2" + }, + { + "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops", + "EventCode": "0x1cf", + "BriefDescription": "Number of Ops tagged by IBS.", + "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.", + "UMask": "0x1" + }, + { + "EventName": "ex_ret_fus_brnch_inst", + "EventCode": "0x1d0", + "BriefDescription": "The number of fused retired branch instructions retired per cycle. The number of events logged per cycle can vary from 0 to 3." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json b/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json new file mode 100644 index 000000000000..ea4711983d1d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json @@ -0,0 +1,168 @@ +[ + { + "EventName": "fpu_pipe_assignment.dual", + "EventCode": "0x00", + "BriefDescription": "Total number multi-pipe uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.", + "UMask": "0xf0" + }, + { + "EventName": "fpu_pipe_assignment.total", + "EventCode": "0x00", + "BriefDescription": "Total number uOps.", + "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.", + "UMask": "0xf" + }, + { + "EventName": "fp_sched_empty", + "EventCode": "0x01", + "BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler." + }, + { + "EventName": "fp_retx87_fp_ops.all", + "EventCode": "0x02", + "BriefDescription": "All Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8.", + "UMask": "0x7" + }, + { + "EventName": "fp_retx87_fp_ops.div_sqr_r_ops", + "EventCode": "0x02", + "BriefDescription": "Divide and square root Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Divide and square root Ops.", + "UMask": "0x4" + }, + { + "EventName": "fp_retx87_fp_ops.mul_ops", + "EventCode": "0x02", + "BriefDescription": "Multiply Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Multiply Ops.", + "UMask": "0x2" + }, + { + "EventName": "fp_retx87_fp_ops.add_sub_ops", + "EventCode": "0x02", + "BriefDescription": "Add/subtract Ops.", + "PublicDescription": "The number of x87 floating-point Ops that have retired. The number of events logged per cycle can vary from 0 to 8. Add/subtract Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_ret_sse_avx_ops.all", + "EventCode": "0x03", + "BriefDescription": "All FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.", + "UMask": "0xff" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x80" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision divide/square root FLOPS.", + "UMask": "0x40" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision multiply FLOPS.", + "UMask": "0x20" + }, + { + "EventName": "fp_ret_sse_avx_ops.dp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Double precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Double precision add/subtract FLOPS.", + "UMask": "0x10" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_add_flops", + "EventCode": "0x03", + "BriefDescription": "Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single precision multiply-add FLOPS. Multiply-add counts as 2 FLOPS.", + "UMask": "0x8" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_div_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision divide/square root FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision divide/square root FLOPS.", + "UMask": "0x4" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_mult_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision multiply FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision multiply FLOPS.", + "UMask": "0x2" + }, + { + "EventName": "fp_ret_sse_avx_ops.sp_add_sub_flops", + "EventCode": "0x03", + "BriefDescription": "Single-precision add/subtract FLOPS.", + "PublicDescription": "This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15. Single-precision add/subtract FLOPS.", + "UMask": "0x1" + }, + { + "EventName": "fp_num_mov_elim_scal_op.optimized", + "EventCode": "0x04", + "BriefDescription": "Number of Scalar Ops optimized.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Scalar Ops optimized.", + "UMask": "0x8" + }, + { + "EventName": "fp_num_mov_elim_scal_op.opt_potential", + "EventCode": "0x04", + "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of Ops that are candidates for optimization (have Z-bit either set or pass).", + "UMask": "0x4" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops eliminated.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops eliminated.", + "UMask": "0x2" + }, + { + "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops", + "EventCode": "0x04", + "BriefDescription": "Number of SSE Move Ops.", + "PublicDescription": "This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes. Number of SSE Move Ops.", + "UMask": "0x1" + }, + { + "EventName": "fp_retired_ser_ops.x87_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits.", + "UMask": "0x8" + }, + { + "EventName": "fp_retired_ser_ops.x87_bot_ret", + "EventCode": "0x05", + "BriefDescription": "x87 bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. x87 bottom-executing uOps retired.", + "UMask": "0x4" + }, + { + "EventName": "fp_retired_ser_ops.sse_ctrl_ret", + "EventCode": "0x05", + "BriefDescription": "SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "PublicDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.", + "UMask": "0x2" + }, + { + "EventName": "fp_retired_ser_ops.sse_bot_ret", + "EventCode": "0x05", + "BriefDescription": "SSE bottom-executing uOps retired.", + "PublicDescription": "The number of serializing Ops retired. SSE bottom-executing uOps retired.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json b/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json new file mode 100644 index 000000000000..fa2d60d4def0 --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json @@ -0,0 +1,162 @@ +[ + { + "EventName": "ls_locks.bus_lock", + "EventCode": "0x25", + "BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.", + "UMask": "0x1" + }, + { + "EventName": "ls_dispatch.ld_st_dispatch", + "EventCode": "0x29", + "BriefDescription": "Load-op-Stores.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.", + "UMask": "0x4" + }, + { + "EventName": "ls_dispatch.store_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x2" + }, + { + "EventName": "ls_dispatch.ld_dispatch", + "EventCode": "0x29", + "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.", + "UMask": "0x1" + }, + { + "EventName": "ls_stlf", + "EventCode": "0x35", + "BriefDescription": "Number of STLF hits." + }, + { + "EventName": "ls_dc_accesses", + "EventCode": "0x40", + "BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event." + }, + { + "EventName": "ls_l1_d_tlb_miss.all", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss or Reload off all sizes.", + "PublicDescription": "L1 DTLB Miss or Reload off all sizes.", + "UMask": "0xff" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 1G size.", + "PublicDescription": "L1 DTLB Miss of a page of 1G size.", + "UMask": "0x80" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 2M size.", + "PublicDescription": "L1 DTLB Miss of a page of 2M size.", + "UMask": "0x40" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 32K size.", + "PublicDescription": "L1 DTLB Miss of a page of 32K size.", + "UMask": "0x20" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Miss of a page of 4K size.", + "PublicDescription": "L1 DTLB Miss of a page of 4K size.", + "UMask": "0x10" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 1G size.", + "PublicDescription": "L1 DTLB Reload of a page of 1G size.", + "UMask": "0x8" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 2M size.", + "PublicDescription": "L1 DTLB Reload of a page of 2M size.", + "UMask": "0x4" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 32K size.", + "PublicDescription": "L1 DTLB Reload of a page of 32K size.", + "UMask": "0x2" + }, + { + "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit", + "EventCode": "0x45", + "BriefDescription": "L1 DTLB Reload of a page of 4K size.", + "PublicDescription": "L1 DTLB Reload of a page of 4K size.", + "UMask": "0x1" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0xc" + }, + { + "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside", + "EventCode": "0x46", + "BriefDescription": "Tablewalker allocation.", + "PublicDescription": "Tablewalker allocation.", + "UMask": "0x3" + }, + { + "EventName": "ls_misal_accesses", + "EventCode": "0x47", + "BriefDescription": "Misaligned loads." + }, + { + "EventName": "ls_pref_instr_disp.prefetch_nta", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.", + "UMask": "0x4" + }, + { + "EventName": "ls_pref_instr_disp.store_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.", + "UMask": "0x2" + }, + { + "EventName": "ls_pref_instr_disp.load_prefetch_w", + "EventCode": "0x4b", + "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.", + "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.", + "UMask": "0x1" + }, + { + "EventName": "ls_inef_sw_pref.mab_mch_cnt", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x2" + }, + { + "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit", + "EventCode": "0x52", + "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.", + "UMask": "0x1" + }, + { + "EventName": "ls_not_halted_cyc", + "EventCode": "0x76", + "BriefDescription": "Cycles not in Halt." + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json new file mode 100644 index 000000000000..b26a00d05a2e --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json @@ -0,0 +1,65 @@ +[ + { + "EventName": "ic_oc_mode_switch.oc_ic_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "OC to IC mode switch.", + "PublicDescription": "OC Mode Switch. OC to IC mode switch.", + "UMask": "0x2" + }, + { + "EventName": "ic_oc_mode_switch.ic_oc_mode_switch", + "EventCode": "0x28a", + "BriefDescription": "IC to OC mode switch.", + "PublicDescription": "OC Mode Switch. IC to OC mode switch.", + "UMask": "0x1" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall", + "EventCode": "0xaf", + "BriefDescription": "RETIRE Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.", + "UMask": "0x40" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall", + "EventCode": "0xaf", + "BriefDescription": "AGSQ Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.", + "UMask": "0x20" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALU tokens total unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.", + "UMask": "0x10" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall", + "EventCode": "0xaf", + "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.", + "UMask": "0x8" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 3 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.", + "UMask": "0x4" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 2 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.", + "UMask": "0x2" + }, + { + "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall", + "EventCode": "0xaf", + "BriefDescription": "ALSQ 1 Tokens unavailable.", + "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.", + "UMask": "0x1" + } +] diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index e05c2c8458fc..d6984a3017e0 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -33,3 +33,4 @@ GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55-[01234],v1,skylakex,core GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core +AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core -- cgit From ec65def1045e4c7817b7f741a86dadae82877a93 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Mar 2019 14:47:35 +0100 Subject: perf data: Support having perf.data stored as a directory The caller needs to set 'struct perf_data::is_dir flag and the path will be treated as a directory. The 'struct perf_data::file' is initialized and open as 'path/header' file. Add a check to the direcory interface functions to check the is_dir flag. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190308134745.5057-2-jolsa@kernel.org [ Be consistent on how to signal failure, i.e. use -1 and let users check errno ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/data.h | 6 ++++++ tools/perf/util/session.c | 4 ++++ 3 files changed, 58 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index e098e189f93e..18fa8d4614eb 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -34,6 +34,9 @@ int perf_data__create_dir(struct perf_data *data, int nr) struct perf_data_file *files = NULL; int i, ret = -1; + if (WARN_ON(!data->is_dir)) + return -EINVAL; + files = zalloc(nr * sizeof(*files)); if (!files) return -ENOMEM; @@ -69,6 +72,9 @@ int perf_data__open_dir(struct perf_data *data) DIR *dir; int nr = 0; + if (WARN_ON(!data->is_dir)) + return -EINVAL; + dir = opendir(data->path); if (!dir) return -EINVAL; @@ -173,6 +179,16 @@ static int check_backup(struct perf_data *data) return 0; } +static bool is_dir(struct perf_data *data) +{ + struct stat st; + + if (stat(data->path, &st)) + return false; + + return (st.st_mode & S_IFMT) == S_IFDIR; +} + static int open_file_read(struct perf_data *data) { struct stat st; @@ -254,6 +270,30 @@ static int open_file_dup(struct perf_data *data) return open_file(data); } +static int open_dir(struct perf_data *data) +{ + int ret; + + /* + * So far we open only the header, so we can read the data version and + * layout. + */ + if (asprintf(&data->file.path, "%s/header", data->path) < 0) + return -1; + + if (perf_data__is_write(data) && + mkdir(data->path, S_IRWXU) < 0) + return -1; + + ret = open_file(data); + + /* Cleanup whatever we managed to create so far. */ + if (ret && perf_data__is_write(data)) + rm_rf_perf_data(data->path); + + return ret; +} + int perf_data__open(struct perf_data *data) { if (check_pipe(data)) @@ -265,11 +305,18 @@ int perf_data__open(struct perf_data *data) if (check_backup(data)) return -1; - return open_file_dup(data); + if (perf_data__is_read(data)) + data->is_dir = is_dir(data); + + return perf_data__is_dir(data) ? + open_dir(data) : open_file_dup(data); } void perf_data__close(struct perf_data *data) { + if (perf_data__is_dir(data)) + perf_data__close_dir(data); + zfree(&data->file.path); close(data->file.fd); } diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 14b47be2bd69..06aefeda311f 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -19,6 +19,7 @@ struct perf_data { const char *path; struct perf_data_file file; bool is_pipe; + bool is_dir; bool force; enum perf_data_mode mode; @@ -43,6 +44,11 @@ static inline int perf_data__is_pipe(struct perf_data *data) return data->is_pipe; } +static inline bool perf_data__is_dir(struct perf_data *data) +{ + return data->is_dir; +} + static inline int perf_data__fd(struct perf_data *data) { return data->file.fd; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index db643f3c2b95..de777bdc0ed3 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -152,6 +152,10 @@ struct perf_session *perf_session__new(struct perf_data *data, } perf_evlist__init_trace_event_sample_raw(session->evlist); + + /* Open the directory data. */ + if (data->is_dir && perf_data__open_dir(data)) + goto out_delete; } } else { session->machines.host.env = &perf_env; -- cgit From cd3dd8dd8ff62374d90cb3f2e54b8c94106c7810 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Mar 2019 14:47:36 +0100 Subject: perf data: Don't store auxtrace index for directory data file We can't store the auxtrace index when we store into multiple files, because we keep only offset for it, not the file. The auxtrace data will be processed correctly in the 'pipe' mode. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190308134745.5057-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f3f7f3100336..e983c8d71a79 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -392,7 +392,7 @@ static int record__process_auxtrace(struct perf_tool *tool, size_t padding; u8 pad[8] = {0}; - if (!perf_data__is_pipe(data)) { + if (!perf_data__is_pipe(data) && !perf_data__is_dir(data)) { off_t file_offset; int fd = perf_data__fd(data); int err; -- cgit From e8be135751f26aa5de63e517d375ecf69e9b20c3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Mar 2019 14:47:37 +0100 Subject: perf data: Add perf_data__update_dir() function Add perf_data__update_dir() to update the size for every file within the perf.data directory. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190308134745.5057-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 20 ++++++++++++++++++++ tools/perf/util/data.h | 1 + 2 files changed, 21 insertions(+) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 18fa8d4614eb..5d0f26aef77f 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -124,6 +124,26 @@ out_err: return ret; } +int perf_data__update_dir(struct perf_data *data) +{ + int i; + + if (WARN_ON(!data->is_dir)) + return -EINVAL; + + for (i = 0; i < data->dir.nr; i++) { + struct perf_data_file *file = &data->dir.files[i]; + struct stat st; + + if (fstat(file->fd, &st)) + return -1; + + file->size = st.st_size; + } + + return 0; +} + static bool check_pipe(struct perf_data *data) { struct stat st; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 06aefeda311f..0deeb1af9f54 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -79,4 +79,5 @@ int perf_data__switch(struct perf_data *data, int perf_data__create_dir(struct perf_data *data, int nr); int perf_data__open_dir(struct perf_data *data); void perf_data__close_dir(struct perf_data *data); +int perf_data__update_dir(struct perf_data *data); #endif /* __PERF_DATA_H */ -- cgit From 29583c17b5ce0bc17fdd80da939f8199a03d9668 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Mar 2019 14:47:38 +0100 Subject: perf data: Make perf_data__size() work over directory Make perf_data__size() return proper size for directory data, summing up all the individual file sizes. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190308134745.5057-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 17 +++++++++++++++++ tools/perf/util/data.h | 6 +----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 5d0f26aef77f..b71c441cafbb 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -393,3 +393,20 @@ out: free(new_filepath); return ret; } + +unsigned long perf_data__size(struct perf_data *data) +{ + u64 size = data->file.size; + int i; + + if (!data->is_dir) + return size; + + for (i = 0; i < data->dir.nr; i++) { + struct perf_data_file *file = &data->dir.files[i]; + + size += file->size; + } + + return size; +} diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 0deeb1af9f54..d342469bdfda 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -54,11 +54,6 @@ static inline int perf_data__fd(struct perf_data *data) return data->file.fd; } -static inline unsigned long perf_data__size(struct perf_data *data) -{ - return data->file.size; -} - int perf_data__open(struct perf_data *data); void perf_data__close(struct perf_data *data); ssize_t perf_data__write(struct perf_data *data, @@ -80,4 +75,5 @@ int perf_data__create_dir(struct perf_data *data, int nr); int perf_data__open_dir(struct perf_data *data); void perf_data__close_dir(struct perf_data *data); int perf_data__update_dir(struct perf_data *data); +unsigned long perf_data__size(struct perf_data *data); #endif /* __PERF_DATA_H */ -- cgit From 258031c017c353e899902342a25579fc81a34cc1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Mar 2019 14:47:39 +0100 Subject: perf header: Add DIR_FORMAT feature to describe directory data The data files layout is described by HEADER_DIR_FORMAT feature. Currently it holds only version number (1): uint64_t version; The current version holds only version value (1) means that data files: - Follow the 'data.*' name format. - Contain raw events data in standard perf format as read from kernel (and need to be sorted) Future versions are expected to describe different data files layout according to special needs. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190308134745.5057-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 ++ tools/perf/util/data.c | 10 ++++++++-- tools/perf/util/data.h | 1 + tools/perf/util/header.c | 44 +++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/header.h | 5 +++++ 5 files changed, 59 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e983c8d71a79..a468d882e74f 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -837,6 +837,8 @@ static void record__init_features(struct record *rec) if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns)) perf_header__clear_feat(&session->header, HEADER_CLOCKID); + perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT); + perf_header__clear_feat(&session->header, HEADER_STAT); } diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index b71c441cafbb..c6b67efea11a 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -14,6 +14,7 @@ #include "data.h" #include "util.h" #include "debug.h" +#include "header.h" static void close_dir(struct perf_data_file *files, int nr) { @@ -41,8 +42,9 @@ int perf_data__create_dir(struct perf_data *data, int nr) if (!files) return -ENOMEM; - data->dir.files = files; - data->dir.nr = nr; + data->dir.version = PERF_DIR_VERSION; + data->dir.files = files; + data->dir.nr = nr; for (i = 0; i < nr; i++) { struct perf_data_file *file = &files[i]; @@ -75,6 +77,10 @@ int perf_data__open_dir(struct perf_data *data) if (WARN_ON(!data->is_dir)) return -EINVAL; + /* The version is provided by DIR_FORMAT feature. */ + if (WARN_ON(data->dir.version != PERF_DIR_VERSION)) + return -1; + dir = opendir(data->path); if (!dir) return -EINVAL; diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index d342469bdfda..6aef8746469f 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -24,6 +24,7 @@ struct perf_data { enum perf_data_mode mode; struct { + u64 version; struct perf_data_file *files; int nr; } dir; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 01b324c275b9..b0683bf4d9f3 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -861,6 +861,21 @@ static int write_clockid(struct feat_fd *ff, sizeof(ff->ph->env.clockid_res_ns)); } +static int write_dir_format(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_session *session; + struct perf_data *data; + + session = container_of(ff->ph, struct perf_session, header); + data = session->data; + + if (WARN_ON(!perf_data__is_dir(data))) + return -1; + + return do_write(ff, &data->dir.version, sizeof(data->dir.version)); +} + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1341,6 +1356,17 @@ static void print_clockid(struct feat_fd *ff, FILE *fp) ff->ph->env.clockid_res_ns * 1000); } +static void print_dir_format(struct feat_fd *ff, FILE *fp) +{ + struct perf_session *session; + struct perf_data *data; + + session = container_of(ff->ph, struct perf_session, header); + data = session->data; + + fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2373,6 +2399,21 @@ static int process_clockid(struct feat_fd *ff, return 0; } +static int process_dir_format(struct feat_fd *ff, + void *_data __maybe_unused) +{ + struct perf_session *session; + struct perf_data *data; + + session = container_of(ff->ph, struct perf_session, header); + data = session->data; + + if (WARN_ON(!perf_data__is_dir(data))) + return -1; + + return do_read_u64(ff, &data->dir.version); +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2432,7 +2473,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(CACHE, cache, true), FEAT_OPR(SAMPLE_TIME, sample_time, false), FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), - FEAT_OPR(CLOCKID, clockid, false) + FEAT_OPR(CLOCKID, clockid, false), + FEAT_OPN(DIR_FORMAT, dir_format, false) }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 0d553ddca0a3..6a231340238d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -39,6 +39,7 @@ enum { HEADER_SAMPLE_TIME, HEADER_MEM_TOPOLOGY, HEADER_CLOCKID, + HEADER_DIR_FORMAT, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; @@ -48,6 +49,10 @@ enum perf_header_version { PERF_HEADER_VERSION_2, }; +enum perf_dir_version { + PERF_DIR_VERSION = 1, +}; + struct perf_file_section { u64 offset; u64 size; -- cgit From e51f806198306a8ad7ae6e34d1af0716ef73da80 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 8 Mar 2019 14:47:40 +0100 Subject: perf session: Add process callback to reader object Adding callback function to reader object so callers can process data in different ways. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Alexey Budankov Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/20190308134745.5057-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index de777bdc0ed3..0ec34227bd60 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1847,10 +1847,17 @@ fetch_mmaped_event(struct perf_session *session, #define NUM_MMAPS 128 #endif +struct reader; + +typedef s64 (*reader_cb_t)(struct perf_session *session, + union perf_event *event, + u64 file_offset); + struct reader { - int fd; - u64 data_size; - u64 data_offset; + int fd; + u64 data_size; + u64 data_offset; + reader_cb_t process; }; static int @@ -1921,7 +1928,7 @@ more: size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, file_pos)) < 0) { + (skip = rd->process(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", file_offset + head, event->header.size, event->header.type); @@ -1947,12 +1954,20 @@ out: return err; } +static s64 process_simple(struct perf_session *session, + union perf_event *event, + u64 file_offset) +{ + return perf_session__process_event(session, event, file_offset); +} + static int __perf_session__process_events(struct perf_session *session) { struct reader rd = { .fd = perf_data__fd(session->data), .data_size = session->header.data_size, .data_offset = session->header.data_offset, + .process = process_simple, }; struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; -- cgit From b41fdc4a7bf9045e4871c5b15905ea732ffd044f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 11 Mar 2019 15:38:10 +0000 Subject: irqchip/gic: Drop support for secondary GIC in non-DT systems We do not have any in-tree platform with this pathological setup, and only a single system (Cavium's cns3xxx) isn't DT aware. Let's drop the secondary GIC support for now, until we remove the above horror altogether. Signed-off-by: Marc Zyngier --- arch/arm/mach-cns3xxx/core.c | 2 +- drivers/irqchip/irq-gic.c | 45 +++++++++++++++-------------------------- include/linux/irqchip/arm-gic.h | 3 +-- 3 files changed, 18 insertions(+), 32 deletions(-) diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c index 7d5a44a06648..f676592d8402 100644 --- a/arch/arm/mach-cns3xxx/core.c +++ b/arch/arm/mach-cns3xxx/core.c @@ -90,7 +90,7 @@ void __init cns3xxx_map_io(void) /* used by entry-macro.S */ void __init cns3xxx_init_irq(void) { - gic_init(0, 29, IOMEM(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT), + gic_init(IOMEM(CNS3XXX_TC11MP_GIC_DIST_BASE_VIRT), IOMEM(CNS3XXX_TC11MP_GIC_CPU_BASE_VIRT)); } diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index ba2a37a27a54..fd3110c171ba 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1089,11 +1089,10 @@ static void gic_init_chip(struct gic_chip_data *gic, struct device *dev, #endif } -static int gic_init_bases(struct gic_chip_data *gic, int irq_start, +static int gic_init_bases(struct gic_chip_data *gic, struct fwnode_handle *handle) { - irq_hw_number_t hwirq_base; - int gic_irqs, irq_base, ret; + int gic_irqs, ret; if (IS_ENABLED(CONFIG_GIC_NON_BANKED) && gic->percpu_offset) { /* Frankein-GIC without banked registers... */ @@ -1145,28 +1144,21 @@ static int gic_init_bases(struct gic_chip_data *gic, int irq_start, } else { /* Legacy support */ /* * For primary GICs, skip over SGIs. - * For secondary GICs, skip over PPIs, too. + * No secondary GIC support whatsoever. */ - if (gic == &gic_data[0] && (irq_start & 31) > 0) { - hwirq_base = 16; - if (irq_start != -1) - irq_start = (irq_start & ~31) + 16; - } else { - hwirq_base = 32; - } + int irq_base; - gic_irqs -= hwirq_base; /* calculate # of irqs to allocate */ + gic_irqs -= 16; /* calculate # of irqs to allocate */ - irq_base = irq_alloc_descs(irq_start, 16, gic_irqs, + irq_base = irq_alloc_descs(16, 16, gic_irqs, numa_node_id()); if (irq_base < 0) { - WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n", - irq_start); - irq_base = irq_start; + WARN(1, "Cannot allocate irq_descs @ IRQ16, assuming pre-allocated\n"); + irq_base = 16; } gic->domain = irq_domain_add_legacy(NULL, gic_irqs, irq_base, - hwirq_base, &gic_irq_domain_ops, gic); + 16, &gic_irq_domain_ops, gic); } if (WARN_ON(!gic->domain)) { @@ -1195,7 +1187,6 @@ error: } static int __init __gic_init_bases(struct gic_chip_data *gic, - int irq_start, struct fwnode_handle *handle) { char *name; @@ -1231,32 +1222,28 @@ static int __init __gic_init_bases(struct gic_chip_data *gic, gic_init_chip(gic, NULL, name, false); } - ret = gic_init_bases(gic, irq_start, handle); + ret = gic_init_bases(gic, handle); if (ret) kfree(name); return ret; } -void __init gic_init(unsigned int gic_nr, int irq_start, - void __iomem *dist_base, void __iomem *cpu_base) +void __init gic_init(void __iomem *dist_base, void __iomem *cpu_base) { struct gic_chip_data *gic; - if (WARN_ON(gic_nr >= CONFIG_ARM_GIC_MAX_NR)) - return; - /* * Non-DT/ACPI systems won't run a hypervisor, so let's not * bother with these... */ static_branch_disable(&supports_deactivate_key); - gic = &gic_data[gic_nr]; + gic = &gic_data[0]; gic->raw_dist_base = dist_base; gic->raw_cpu_base = cpu_base; - __gic_init_bases(gic, irq_start, NULL); + __gic_init_bases(gic, NULL); } static void gic_teardown(struct gic_chip_data *gic) @@ -1399,7 +1386,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq) if (ret) return ret; - ret = gic_init_bases(*gic, -1, &dev->of_node->fwnode); + ret = gic_init_bases(*gic, &dev->of_node->fwnode); if (ret) { gic_teardown(*gic); return ret; @@ -1459,7 +1446,7 @@ gic_of_init(struct device_node *node, struct device_node *parent) if (gic_cnt == 0 && !gic_check_eoimode(node, &gic->raw_cpu_base)) static_branch_disable(&supports_deactivate_key); - ret = __gic_init_bases(gic, -1, &node->fwnode); + ret = __gic_init_bases(gic, &node->fwnode); if (ret) { gic_teardown(gic); return ret; @@ -1650,7 +1637,7 @@ static int __init gic_v2_acpi_init(struct acpi_subtable_header *header, return -ENOMEM; } - ret = __gic_init_bases(gic, -1, domain_handle); + ret = __gic_init_bases(gic, domain_handle); if (ret) { pr_err("Failed to initialise GIC\n"); irq_domain_free_fwnode(domain_handle); diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index 626179077bb0..0f049b384ccd 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -158,8 +158,7 @@ int gic_of_init_child(struct device *dev, struct gic_chip_data **gic, int irq); * Legacy platforms not converted to DT yet must use this to init * their GIC */ -void gic_init(unsigned int nr, int start, - void __iomem *dist , void __iomem *cpu); +void gic_init(void __iomem *dist , void __iomem *cpu); int gicv2m_init(struct fwnode_handle *parent_handle, struct irq_domain *parent); -- cgit From 75065a85a9705ad4c0135f07fd4467d46ff342a3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 8 Mar 2019 21:56:20 -0800 Subject: perf report: Use less for scripts output The UI viewer for scripts output has a lot of limitations: limited size, no search or save function, slow, and various other issues. Just use 'less' to display directly on the terminal instead. This won't work in GTK mode, but GTK doesn't support these context menus anyways. If that is ever done could use an terminal for the output. Signed-off-by: Andi Kleen Acked-by: Feng Tang Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20190309055628.21617-8-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/scripts.c | 130 +++++---------------------------------- 1 file changed, 17 insertions(+), 113 deletions(-) diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 90a32ac69e76..7f36630694bf 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -1,35 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include #include "../../util/sort.h" #include "../../util/util.h" #include "../../util/hist.h" #include "../../util/debug.h" #include "../../util/symbol.h" #include "../browser.h" -#include "../helpline.h" #include "../libslang.h" -/* 2048 lines should be enough for a script output */ -#define MAX_LINES 2048 - -/* 160 bytes for one output line */ -#define AVERAGE_LINE_LEN 160 - -struct script_line { - struct list_head node; - char line[AVERAGE_LINE_LEN]; -}; - -struct perf_script_browser { - struct ui_browser b; - struct list_head entries; - const char *script_name; - int nr_lines; -}; - #define SCRIPT_NAMELEN 128 #define SCRIPT_MAX_NO 64 /* @@ -73,69 +50,29 @@ static int list_scripts(char *script_name) return ret; } -static void script_browser__write(struct ui_browser *browser, - void *entry, int row) +static void run_script(char *cmd) { - struct script_line *sline = list_entry(entry, struct script_line, node); - bool current_entry = ui_browser__is_current_entry(browser, row); - - ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED : - HE_COLORSET_NORMAL); - - ui_browser__write_nstring(browser, sline->line, browser->width); + pr_debug("Running %s\n", cmd); + SLang_reset_tty(); + if (system(cmd) < 0) + pr_warning("Cannot run %s\n", cmd); + /* + * SLang doesn't seem to reset the whole terminal, so be more + * forceful to get back to the original state. + */ + printf("\033[c\033[H\033[J"); + fflush(stdout); + SLang_init_tty(0, 0, 0); + SLsmg_refresh(); } -static int script_browser__run(struct perf_script_browser *browser) -{ - int key; - - if (ui_browser__show(&browser->b, browser->script_name, - "Press ESC to exit") < 0) - return -1; - - while (1) { - key = ui_browser__run(&browser->b, 0); - - /* We can add some special key handling here if needed */ - break; - } - - ui_browser__hide(&browser->b); - return key; -} - - int script_browse(const char *script_opt) { char cmd[SCRIPT_FULLPATH_LEN*2], script_name[SCRIPT_FULLPATH_LEN]; - char *line = NULL; - size_t len = 0; - ssize_t retlen; - int ret = -1, nr_entries = 0; - FILE *fp; - void *buf; - struct script_line *sline; - - struct perf_script_browser script = { - .b = { - .refresh = ui_browser__list_head_refresh, - .seek = ui_browser__list_head_seek, - .write = script_browser__write, - }, - .script_name = script_name, - }; - - INIT_LIST_HEAD(&script.entries); - - /* Save each line of the output in one struct script_line object. */ - buf = zalloc((sizeof(*sline)) * MAX_LINES); - if (!buf) - return -1; - sline = buf; memset(script_name, 0, SCRIPT_FULLPATH_LEN); if (list_scripts(script_name)) - goto exit; + return -1; sprintf(cmd, "perf script -s %s ", script_name); @@ -147,42 +84,9 @@ int script_browse(const char *script_opt) strcat(cmd, input_name); } - strcat(cmd, " 2>&1"); - - fp = popen(cmd, "r"); - if (!fp) - goto exit; - - while ((retlen = getline(&line, &len, fp)) != -1) { - strncpy(sline->line, line, AVERAGE_LINE_LEN); - - /* If one output line is very large, just cut it short */ - if (retlen >= AVERAGE_LINE_LEN) { - sline->line[AVERAGE_LINE_LEN - 1] = '\0'; - sline->line[AVERAGE_LINE_LEN - 2] = '\n'; - } - list_add_tail(&sline->node, &script.entries); - - if (script.b.width < retlen) - script.b.width = retlen; - - if (nr_entries++ >= MAX_LINES - 1) - break; - sline++; - } - - if (script.b.width > AVERAGE_LINE_LEN) - script.b.width = AVERAGE_LINE_LEN; - - free(line); - pclose(fp); + strcat(cmd, " 2>&1 | less"); - script.nr_lines = nr_entries; - script.b.nr_entries = nr_entries; - script.b.entries = &script.entries; + run_script(cmd); - ret = script_browser__run(&script); -exit: - free(buf); - return ret; + return 0; } -- cgit From 0d9c038feff6f834ad9e5d88b66715235ab23ff3 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Mon, 18 Feb 2019 12:01:35 -0500 Subject: zcrypt: handle AP Info notification from CHSC SEI command The current AP bus implementation periodically polls the AP configuration to detect changes. When the AP configuration is dynamically changed via the SE or an SCLP instruction, the changes will not be reflected to sysfs until the next time the AP configuration is polled. The CHSC architecture provides a Store Event Information (SEI) command to make notification of an AP configuration change. This patch introduces a handler to process notification from the CHSC SEI command by immediately kicking off an AP bus scan-after-event. Signed-off-by: Tony Krowiak Reviewed-by: Halil Pasic Reviewed-by: Sebastian Ott Reviewed-by: Harald Freudenberger Reviewed-by: Cornelia Huck Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ap.h | 11 +++++++++++ drivers/s390/cio/chsc.c | 13 +++++++++++++ drivers/s390/crypto/ap_bus.c | 10 ++++++++++ 3 files changed, 34 insertions(+) diff --git a/arch/s390/include/asm/ap.h b/arch/s390/include/asm/ap.h index 1a6a7092d942..e94a0a28b5eb 100644 --- a/arch/s390/include/asm/ap.h +++ b/arch/s390/include/asm/ap.h @@ -360,4 +360,15 @@ static inline struct ap_queue_status ap_dqap(ap_qid_t qid, return reg1; } +/* + * Interface to tell the AP bus code that a configuration + * change has happened. The bus code should at least do + * an ap bus resource rescan. + */ +#if IS_ENABLED(CONFIG_ZCRYPT) +void ap_bus_cfg_chg(void); +#else +static inline void ap_bus_cfg_chg(void){}; +#endif + #endif /* _ASM_S390_AP_H_ */ diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c index a0baee25134c..eaf4699be2c5 100644 --- a/drivers/s390/cio/chsc.c +++ b/drivers/s390/cio/chsc.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "css.h" #include "cio.h" @@ -586,6 +587,15 @@ static void chsc_process_sei_scm_avail(struct chsc_sei_nt0_area *sei_area) " failed (rc=%d).\n", ret); } +static void chsc_process_sei_ap_cfg_chg(struct chsc_sei_nt0_area *sei_area) +{ + CIO_CRW_EVENT(3, "chsc: ap config changed\n"); + if (sei_area->rs != 5) + return; + + ap_bus_cfg_chg(); +} + static void chsc_process_sei_nt2(struct chsc_sei_nt2_area *sei_area) { switch (sei_area->cc) { @@ -612,6 +622,9 @@ static void chsc_process_sei_nt0(struct chsc_sei_nt0_area *sei_area) case 2: /* i/o resource accessibility */ chsc_process_sei_res_acc(sei_area); break; + case 3: /* ap config changed */ + chsc_process_sei_ap_cfg_chg(sei_area); + break; case 7: /* channel-path-availability information */ chsc_process_sei_chp_avail(sei_area); break; diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 033a1acabf48..1546389d71db 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -867,6 +867,16 @@ void ap_bus_force_rescan(void) } EXPORT_SYMBOL(ap_bus_force_rescan); +/* +* A config change has happened, force an ap bus rescan. +*/ +void ap_bus_cfg_chg(void) +{ + AP_DBF(DBF_INFO, "%s config change, forcing bus rescan\n", __func__); + + ap_bus_force_rescan(); +} + /* * hex2bitmap() - parse hex mask string and set bitmap. * Valid strings are "0x012345678" with at least one valid hex number. -- cgit From 5f5f67da9781770df0403269bc57d7aae608fecd Mon Sep 17 00:00:00 2001 From: Yifeng Li Date: Tue, 5 Mar 2019 06:00:22 +0800 Subject: mips: loongson64: lemote-2f: Add IRQF_NO_SUSPEND to "cascade" irqaction. Timekeeping IRQs from CS5536 MFGPT are routed to i8259, which then triggers the "cascade" IRQ on MIPS CPU. Without IRQF_NO_SUSPEND in cascade_irqaction, MFGPT interrupts will be masked in suspend mode, and the machine would be unable to resume once suspended. Previously, MIPS IRQs were not disabled properly, so the original code appeared to work. Commit a3e6c1eff5 ("MIPS: IRQ: Fix disable_irq on CPU IRQs") uncovers the bug. To fix it, add IRQF_NO_SUSPEND to cascade_irqaction. This commit is functionally identical to 0add9c2f1cff ("MIPS: Loongson-3: Add IRQF_NO_SUSPEND to Cascade irqaction"), but it forgot to apply the same fix to Loongson2. Signed-off-by: Yifeng Li Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: Jiaxun Yang Cc: Huacai Chen Cc: Ralf Baechle Cc: James Hogan Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.19+ --- arch/mips/loongson64/lemote-2f/irq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson64/lemote-2f/irq.c b/arch/mips/loongson64/lemote-2f/irq.c index 9e33e45aa17c..b213cecb8e3a 100644 --- a/arch/mips/loongson64/lemote-2f/irq.c +++ b/arch/mips/loongson64/lemote-2f/irq.c @@ -103,7 +103,7 @@ static struct irqaction ip6_irqaction = { static struct irqaction cascade_irqaction = { .handler = no_action, .name = "cascade", - .flags = IRQF_NO_THREAD, + .flags = IRQF_NO_THREAD | IRQF_NO_SUSPEND, }; void __init mach_init_irq(void) -- cgit From 3f0a53bc6482fb09770982a8447981260ea258dc Mon Sep 17 00:00:00 2001 From: Yasha Cherikovsky Date: Fri, 8 Mar 2019 14:58:51 +0200 Subject: MIPS: Ensure ELF appended dtb is relocated This fixes booting with the combination of CONFIG_RELOCATABLE=y and CONFIG_MIPS_ELF_APPENDED_DTB=y. Sections that appear after the relocation table are not relocated on system boot (except .bss, which has special handling). With CONFIG_MIPS_ELF_APPENDED_DTB, the dtb is part of the vmlinux ELF, so it must be relocated together with everything else. Fixes: 069fd766271d ("MIPS: Reserve space for relocation table") Signed-off-by: Yasha Cherikovsky Signed-off-by: Paul Burton Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v4.7+ --- arch/mips/kernel/vmlinux.lds.S | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index cb7e9ed7a453..33ee0d18fb0a 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -140,6 +140,13 @@ SECTIONS PERCPU_SECTION(1 << CONFIG_MIPS_L1_CACHE_SHIFT) #endif +#ifdef CONFIG_MIPS_ELF_APPENDED_DTB + .appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) { + *(.appended_dtb) + KEEP(*(.appended_dtb)) + } +#endif + #ifdef CONFIG_RELOCATABLE . = ALIGN(4); @@ -164,11 +171,6 @@ SECTIONS __appended_dtb = .; /* leave space for appended DTB */ . += 0x100000; -#elif defined(CONFIG_MIPS_ELF_APPENDED_DTB) - .appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) { - *(.appended_dtb) - KEEP(*(.appended_dtb)) - } #endif /* * Align to 64K in attempt to eliminate holes before the -- cgit From 47c25036b60f27b86ab44b66a8861bcf81cde39b Mon Sep 17 00:00:00 2001 From: Archer Yan Date: Fri, 8 Mar 2019 03:29:19 +0000 Subject: MIPS: Fix kernel crash for R6 in jump label branch function Insert Branch instruction instead of NOP to make sure assembler don't patch code in forbidden slot. In jump label function, it might be possible to patch Control Transfer Instructions(CTIs) into forbidden slot, which will generate Reserved Instruction exception in MIPS release 6. Signed-off-by: Archer Yan Reviewed-by: Paul Burton [paul.burton@mips.com: - Add MIPS prefix to subject. - Mark for stable from v4.0, which introduced r6 support, onwards.] Signed-off-by: Paul Burton Cc: linux-mips@vger.kernel.org Cc: stable@vger.kernel.org # v4.0+ --- arch/mips/include/asm/jump_label.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h index e77672539e8e..e4456e450f94 100644 --- a/arch/mips/include/asm/jump_label.h +++ b/arch/mips/include/asm/jump_label.h @@ -21,15 +21,15 @@ #endif #ifdef CONFIG_CPU_MICROMIPS -#define NOP_INSN "nop32" +#define B_INSN "b32" #else -#define NOP_INSN "nop" +#define B_INSN "b" #endif static __always_inline bool arch_static_branch(struct static_key *key, bool branch) { - asm_volatile_goto("1:\t" NOP_INSN "\n\t" - "nop\n\t" + asm_volatile_goto("1:\t" B_INSN " 2f\n\t" + "2:\tnop\n\t" ".pushsection __jump_table, \"aw\"\n\t" WORD_INSN " 1b, %l[l_yes], %0\n\t" ".popsection\n\t" -- cgit From beda0e725e5f06aca27eda2434ea9447dad88e36 Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Fri, 8 Mar 2019 16:05:15 -0800 Subject: perf script python: Add Python3 support to exported-sql-viewer.py Support both Python2 and Python3 in the exported-sql-viewer.py script. The use of 'from __future__' implies the minimum supported Python2 version is now v2.6 Signed-off-by: Tony Jones Acked-by: Adrian Hunter Link: http://lkml.kernel.org/r/20190309000518.2438-2-tonyj@suse.de Signed-off-by: Seeteena Thoufeek Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 42 ++++++++++++++++-------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index afec9479ca7f..e38518cdcbc3 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -88,11 +88,20 @@ # 7fab593ea956 48 89 15 3b 13 22 00 movq %rdx, 0x22133b(%rip) # 8107675243232 2 ls 22011 22011 hardware interrupt No 7fab593ea956 _dl_start+0x26 (ld-2.19.so) -> ffffffff86a012e0 page_fault ([kernel]) +from __future__ import print_function + import sys import weakref import threading import string -import cPickle +try: + # Python2 + import cPickle as pickle + # size of pickled integer big enough for record size + glb_nsz = 8 +except ImportError: + import pickle + glb_nsz = 16 import re import os from PySide.QtCore import * @@ -102,6 +111,15 @@ from decimal import * from ctypes import * from multiprocessing import Process, Array, Value, Event +# xrange is range in Python3 +try: + xrange +except NameError: + xrange = range + +def printerr(*args, **keyword_args): + print(*args, file=sys.stderr, **keyword_args) + # Data formatting helpers def tohex(ip): @@ -1004,10 +1022,6 @@ class ChildDataItemFinder(): glb_chunk_sz = 10000 -# size of pickled integer big enough for record size - -glb_nsz = 8 - # Background process for SQL data fetcher class SQLFetcherProcess(): @@ -1066,7 +1080,7 @@ class SQLFetcherProcess(): return True if space >= glb_nsz: # Use 0 (or space < glb_nsz) to mean there is no more at the top of the buffer - nd = cPickle.dumps(0, cPickle.HIGHEST_PROTOCOL) + nd = pickle.dumps(0, pickle.HIGHEST_PROTOCOL) self.buffer[self.local_head : self.local_head + len(nd)] = nd self.local_head = 0 if self.local_tail - self.local_head > sz: @@ -1084,9 +1098,9 @@ class SQLFetcherProcess(): self.wait_event.wait() def AddToBuffer(self, obj): - d = cPickle.dumps(obj, cPickle.HIGHEST_PROTOCOL) + d = pickle.dumps(obj, pickle.HIGHEST_PROTOCOL) n = len(d) - nd = cPickle.dumps(n, cPickle.HIGHEST_PROTOCOL) + nd = pickle.dumps(n, pickle.HIGHEST_PROTOCOL) sz = n + glb_nsz self.WaitForSpace(sz) pos = self.local_head @@ -1198,12 +1212,12 @@ class SQLFetcher(QObject): pos = self.local_tail if len(self.buffer) - pos < glb_nsz: pos = 0 - n = cPickle.loads(self.buffer[pos : pos + glb_nsz]) + n = pickle.loads(self.buffer[pos : pos + glb_nsz]) if n == 0: pos = 0 - n = cPickle.loads(self.buffer[0 : glb_nsz]) + n = pickle.loads(self.buffer[0 : glb_nsz]) pos += glb_nsz - obj = cPickle.loads(self.buffer[pos : pos + n]) + obj = pickle.loads(self.buffer[pos : pos + n]) self.local_tail = pos + n return obj @@ -2973,7 +2987,7 @@ class DBRef(): def Main(): if (len(sys.argv) < 2): - print >> sys.stderr, "Usage is: exported-sql-viewer.py { | --help-only}" + printerr("Usage is: exported-sql-viewer.py { | --help-only}"); raise Exception("Too few arguments") dbname = sys.argv[1] @@ -2986,8 +3000,8 @@ def Main(): is_sqlite3 = False try: - f = open(dbname) - if f.read(15) == "SQLite format 3": + f = open(dbname, "rb") + if f.read(15) == b'SQLite format 3': is_sqlite3 = True f.close() except: -- cgit From 1937b0560c3ea43b1b0f7d3617949ca50de8f8c0 Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Fri, 8 Mar 2019 16:05:16 -0800 Subject: perf script python: Add Python3 support to export-to-postgresql.py Support both Python2 and Python3 in the export-to-postgresql.py script. The use of 'from __future__' implies the minimum supported Python2 version is now v2.6 Signed-off-by: Tony Jones Link: http://lkml.kernel.org/r/20190309000518.2438-3-tonyj@suse.de Signed-off-by: Adrian Hunter Signed-off-by: Seeteena Thoufeek Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 58 ++++++++++++++++------- 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 390a351d15ea..00ab972a2eba 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -10,6 +10,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. +from __future__ import print_function + import os import sys import struct @@ -199,6 +201,18 @@ import datetime from PySide.QtSql import * +if sys.version_info < (3, 0): + def toserverstr(str): + return str + def toclientstr(str): + return str +else: + # Assume UTF-8 server_encoding and client_encoding + def toserverstr(str): + return bytes(str, "UTF_8") + def toclientstr(str): + return bytes(str, "UTF_8") + # Need to access PostgreSQL C library directly to use COPY FROM STDIN from ctypes import * libpq = CDLL("libpq.so.5") @@ -234,12 +248,14 @@ perf_db_export_mode = True perf_db_export_calls = False perf_db_export_callchains = False +def printerr(*args, **kw_args): + print(*args, file=sys.stderr, **kw_args) def usage(): - print >> sys.stderr, "Usage is: export-to-postgresql.py [] [] []" - print >> sys.stderr, "where: columns 'all' or 'branches'" - print >> sys.stderr, " calls 'calls' => create calls and call_paths table" - print >> sys.stderr, " callchains 'callchains' => create call_paths table" + printerr("Usage is: export-to-postgresql.py [] [] []") + printerr("where: columns 'all' or 'branches'") + printerr(" calls 'calls' => create calls and call_paths table") + printerr(" callchains 'callchains' => create call_paths table") raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -273,7 +289,7 @@ def do_query(q, s): return raise Exception("Query failed: " + q.lastError().text()) -print datetime.datetime.today(), "Creating database..." +print(datetime.datetime.today(), "Creating database...") db = QSqlDatabase.addDatabase('QPSQL') query = QSqlQuery(db) @@ -506,12 +522,12 @@ do_query(query, 'CREATE VIEW samples_view AS ' ' FROM samples') -file_header = struct.pack("!11sii", "PGCOPY\n\377\r\n\0", 0, 0) -file_trailer = "\377\377" +file_header = struct.pack("!11sii", b"PGCOPY\n\377\r\n\0", 0, 0) +file_trailer = b"\377\377" def open_output_file(file_name): path_name = output_dir_name + "/" + file_name - file = open(path_name, "w+") + file = open(path_name, "wb+") file.write(file_header) return file @@ -526,13 +542,13 @@ def copy_output_file_direct(file, table_name): # Use COPY FROM STDIN because security may prevent postgres from accessing the files directly def copy_output_file(file, table_name): - conn = PQconnectdb("dbname = " + dbname) + conn = PQconnectdb(toclientstr("dbname = " + dbname)) if (PQstatus(conn)): raise Exception("COPY FROM STDIN PQconnectdb failed") file.write(file_trailer) file.seek(0) sql = "COPY " + table_name + " FROM STDIN (FORMAT 'binary')" - res = PQexec(conn, sql) + res = PQexec(conn, toclientstr(sql)) if (PQresultStatus(res) != 4): raise Exception("COPY FROM STDIN PQexec failed") data = file.read(65536) @@ -566,7 +582,7 @@ if perf_db_export_calls: call_file = open_output_file("call_table.bin") def trace_begin(): - print datetime.datetime.today(), "Writing to intermediate files..." + print(datetime.datetime.today(), "Writing to intermediate files...") # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs evsel_table(0, "unknown") machine_table(0, 0, "unknown") @@ -582,7 +598,7 @@ def trace_begin(): unhandled_count = 0 def trace_end(): - print datetime.datetime.today(), "Copying to database..." + print(datetime.datetime.today(), "Copying to database...") copy_output_file(evsel_file, "selected_events") copy_output_file(machine_file, "machines") copy_output_file(thread_file, "threads") @@ -597,7 +613,7 @@ def trace_end(): if perf_db_export_calls: copy_output_file(call_file, "calls") - print datetime.datetime.today(), "Removing intermediate files..." + print(datetime.datetime.today(), "Removing intermediate files...") remove_output_file(evsel_file) remove_output_file(machine_file) remove_output_file(thread_file) @@ -612,7 +628,7 @@ def trace_end(): if perf_db_export_calls: remove_output_file(call_file) os.rmdir(output_dir_name) - print datetime.datetime.today(), "Adding primary keys" + print(datetime.datetime.today(), "Adding primary keys") do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)') @@ -627,7 +643,7 @@ def trace_end(): if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') - print datetime.datetime.today(), "Adding foreign keys" + print(datetime.datetime.today(), "Adding foreign keys") do_query(query, 'ALTER TABLE threads ' 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)') @@ -663,8 +679,8 @@ def trace_end(): do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') if (unhandled_count): - print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events" - print datetime.datetime.today(), "Done" + print(datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events") + print(datetime.datetime.today(), "Done") def trace_unhandled(event_name, context, event_fields_dict): global unhandled_count @@ -674,12 +690,14 @@ def sched__sched_switch(*x): pass def evsel_table(evsel_id, evsel_name, *x): + evsel_name = toserverstr(evsel_name) n = len(evsel_name) fmt = "!hiqi" + str(n) + "s" value = struct.pack(fmt, 2, 8, evsel_id, n, evsel_name) evsel_file.write(value) def machine_table(machine_id, pid, root_dir, *x): + root_dir = toserverstr(root_dir) n = len(root_dir) fmt = "!hiqiii" + str(n) + "s" value = struct.pack(fmt, 3, 8, machine_id, 4, pid, n, root_dir) @@ -690,6 +708,7 @@ def thread_table(thread_id, machine_id, process_id, pid, tid, *x): thread_file.write(value) def comm_table(comm_id, comm_str, *x): + comm_str = toserverstr(comm_str) n = len(comm_str) fmt = "!hiqi" + str(n) + "s" value = struct.pack(fmt, 2, 8, comm_id, n, comm_str) @@ -701,6 +720,9 @@ def comm_thread_table(comm_thread_id, comm_id, thread_id, *x): comm_thread_file.write(value) def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x): + short_name = toserverstr(short_name) + long_name = toserverstr(long_name) + build_id = toserverstr(build_id) n1 = len(short_name) n2 = len(long_name) n3 = len(build_id) @@ -709,12 +731,14 @@ def dso_table(dso_id, machine_id, short_name, long_name, build_id, *x): dso_file.write(value) def symbol_table(symbol_id, dso_id, sym_start, sym_end, binding, symbol_name, *x): + symbol_name = toserverstr(symbol_name) n = len(symbol_name) fmt = "!hiqiqiqiqiii" + str(n) + "s" value = struct.pack(fmt, 6, 8, symbol_id, 8, dso_id, 8, sym_start, 8, sym_end, 4, binding, n, symbol_name) symbol_file.write(value) def branch_type_table(branch_type, name, *x): + name = toserverstr(name) n = len(name) fmt = "!hiii" + str(n) + "s" value = struct.pack(fmt, 2, 4, branch_type, n, name) -- cgit From ebf6c5c181abe9309788c6241d39602a1ce18723 Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Fri, 8 Mar 2019 16:05:17 -0800 Subject: perf script python: Add Python3 support to export-to-sqlite.py Support both Python2 and Python3 in the export-to-sqlite.py script The use of 'from __future__' implies the minimum supported Python2 version is now v2.6 Signed-off-by: Tony Jones Acked-by: Adrian Hunter Link: http://lkml.kernel.org/r/20190309000518.2438-4-tonyj@suse.de Signed-off-by: Seeteena Thoufeek Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-sqlite.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index eb63e6c7107f..3da338243aed 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -10,6 +10,8 @@ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. +from __future__ import print_function + import os import sys import struct @@ -60,11 +62,14 @@ perf_db_export_mode = True perf_db_export_calls = False perf_db_export_callchains = False +def printerr(*args, **keyword_args): + print(*args, file=sys.stderr, **keyword_args) + def usage(): - print >> sys.stderr, "Usage is: export-to-sqlite.py [] [] []" - print >> sys.stderr, "where: columns 'all' or 'branches'" - print >> sys.stderr, " calls 'calls' => create calls and call_paths table" - print >> sys.stderr, " callchains 'callchains' => create call_paths table" + printerr("Usage is: export-to-sqlite.py [] [] []"); + printerr("where: columns 'all' or 'branches'"); + printerr(" calls 'calls' => create calls and call_paths table"); + printerr(" callchains 'callchains' => create call_paths table"); raise Exception("Too few arguments") if (len(sys.argv) < 2): @@ -100,7 +105,7 @@ def do_query_(q): return raise Exception("Query failed: " + q.lastError().text()) -print datetime.datetime.today(), "Creating database..." +print(datetime.datetime.today(), "Creating database ...") db_exists = False try: @@ -378,7 +383,7 @@ if perf_db_export_calls: call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") def trace_begin(): - print datetime.datetime.today(), "Writing records..." + print(datetime.datetime.today(), "Writing records...") do_query(query, 'BEGIN TRANSACTION') # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs evsel_table(0, "unknown") @@ -397,14 +402,14 @@ unhandled_count = 0 def trace_end(): do_query(query, 'END TRANSACTION') - print datetime.datetime.today(), "Adding indexes" + print(datetime.datetime.today(), "Adding indexes") if perf_db_export_calls: do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') if (unhandled_count): - print datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events" - print datetime.datetime.today(), "Done" + print(datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events") + print(datetime.datetime.today(), "Done") def trace_unhandled(event_name, context, event_fields_dict): global unhandled_count -- cgit From 49f93bbf17e6267eb34e0c12a9813f3a8723749e Mon Sep 17 00:00:00 2001 From: Tony Jones Date: Fri, 8 Mar 2019 16:05:18 -0800 Subject: perf script python: Add printdate function to SQL exporters Introduce a printdate function to eliminate the repetitive use of datetime.datetime.today() in the SQL exporting scripts. Signed-off-by: Tony Jones Acked-by: Adrian Hunter Link: http://lkml.kernel.org/r/20190309000518.2438-5-tonyj@suse.de Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/export-to-postgresql.py | 19 +++++++++++-------- tools/perf/scripts/python/export-to-sqlite.py | 13 ++++++++----- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 00ab972a2eba..c3eae1d77d36 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -251,6 +251,9 @@ perf_db_export_callchains = False def printerr(*args, **kw_args): print(*args, file=sys.stderr, **kw_args) +def printdate(*args, **kw_args): + print(datetime.datetime.today(), *args, sep=' ', **kw_args) + def usage(): printerr("Usage is: export-to-postgresql.py [] [] []") printerr("where: columns 'all' or 'branches'") @@ -289,7 +292,7 @@ def do_query(q, s): return raise Exception("Query failed: " + q.lastError().text()) -print(datetime.datetime.today(), "Creating database...") +printdate("Creating database...") db = QSqlDatabase.addDatabase('QPSQL') query = QSqlQuery(db) @@ -582,7 +585,7 @@ if perf_db_export_calls: call_file = open_output_file("call_table.bin") def trace_begin(): - print(datetime.datetime.today(), "Writing to intermediate files...") + printdate("Writing to intermediate files...") # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs evsel_table(0, "unknown") machine_table(0, 0, "unknown") @@ -598,7 +601,7 @@ def trace_begin(): unhandled_count = 0 def trace_end(): - print(datetime.datetime.today(), "Copying to database...") + printdate("Copying to database...") copy_output_file(evsel_file, "selected_events") copy_output_file(machine_file, "machines") copy_output_file(thread_file, "threads") @@ -613,7 +616,7 @@ def trace_end(): if perf_db_export_calls: copy_output_file(call_file, "calls") - print(datetime.datetime.today(), "Removing intermediate files...") + printdate("Removing intermediate files...") remove_output_file(evsel_file) remove_output_file(machine_file) remove_output_file(thread_file) @@ -628,7 +631,7 @@ def trace_end(): if perf_db_export_calls: remove_output_file(call_file) os.rmdir(output_dir_name) - print(datetime.datetime.today(), "Adding primary keys") + printdate("Adding primary keys") do_query(query, 'ALTER TABLE selected_events ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE machines ADD PRIMARY KEY (id)') do_query(query, 'ALTER TABLE threads ADD PRIMARY KEY (id)') @@ -643,7 +646,7 @@ def trace_end(): if perf_db_export_calls: do_query(query, 'ALTER TABLE calls ADD PRIMARY KEY (id)') - print(datetime.datetime.today(), "Adding foreign keys") + printdate("Adding foreign keys") do_query(query, 'ALTER TABLE threads ' 'ADD CONSTRAINT machinefk FOREIGN KEY (machine_id) REFERENCES machines (id),' 'ADD CONSTRAINT processfk FOREIGN KEY (process_id) REFERENCES threads (id)') @@ -679,8 +682,8 @@ def trace_end(): do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') if (unhandled_count): - print(datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events") - print(datetime.datetime.today(), "Done") + printdate("Warning: ", unhandled_count, " unhandled events") + printdate("Done") def trace_unhandled(event_name, context, event_fields_dict): global unhandled_count diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 3da338243aed..3b71902a5a21 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -65,6 +65,9 @@ perf_db_export_callchains = False def printerr(*args, **keyword_args): print(*args, file=sys.stderr, **keyword_args) +def printdate(*args, **kw_args): + print(datetime.datetime.today(), *args, sep=' ', **kw_args) + def usage(): printerr("Usage is: export-to-sqlite.py [] [] []"); printerr("where: columns 'all' or 'branches'"); @@ -105,7 +108,7 @@ def do_query_(q): return raise Exception("Query failed: " + q.lastError().text()) -print(datetime.datetime.today(), "Creating database ...") +printdate("Creating database ...") db_exists = False try: @@ -383,7 +386,7 @@ if perf_db_export_calls: call_query.prepare("INSERT INTO calls VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)") def trace_begin(): - print(datetime.datetime.today(), "Writing records...") + printdate("Writing records...") do_query(query, 'BEGIN TRANSACTION') # id == 0 means unknown. It is easier to create records for them than replace the zeroes with NULLs evsel_table(0, "unknown") @@ -402,14 +405,14 @@ unhandled_count = 0 def trace_end(): do_query(query, 'END TRANSACTION') - print(datetime.datetime.today(), "Adding indexes") + printdate("Adding indexes") if perf_db_export_calls: do_query(query, 'CREATE INDEX pcpid_idx ON calls (parent_call_path_id)') do_query(query, 'CREATE INDEX pid_idx ON calls (parent_id)') if (unhandled_count): - print(datetime.datetime.today(), "Warning: ", unhandled_count, " unhandled events") - print(datetime.datetime.today(), "Done") + printdate("Warning: ", unhandled_count, " unhandled events") + printdate("Done") def trace_unhandled(event_name, context, event_fields_dict): global unhandled_count -- cgit From df94bb44b518b1d0c9f4b2e5127441cec13ab75c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Mar 2019 13:20:25 -0300 Subject: perf tools: Update x86's syscall_64.tbl, no change in tools/perf behaviour To pick the changes in 7948450d4556 ("x86/x32: use time64 versions of sigtimedwait and recvmmsg"), that doesn't cause any change in behaviour in tools/perf/ as it deals just with the x32 entries. This silences this tools/perf build warning: Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-mqpvshayeqidlulx5qpioa59@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index f0b1709a5ffb..2ae92fddb6d5 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -343,6 +343,8 @@ 332 common statx __x64_sys_statx 333 common io_pgetevents __x64_sys_io_pgetevents 334 common rseq __x64_sys_rseq +# don't use numbers 387 through 423, add new calls after the last +# 'common' entry # # x32-specific system call numbers start at 512 to avoid cache impact @@ -361,7 +363,7 @@ 520 x32 execve __x32_compat_sys_execve/ptregs 521 x32 ptrace __x32_compat_sys_ptrace 522 x32 rt_sigpending __x32_compat_sys_rt_sigpending -523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait +523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64 524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo 525 x32 sigaltstack __x32_compat_sys_sigaltstack 526 x32 timer_create __x32_compat_sys_timer_create @@ -375,7 +377,7 @@ 534 x32 preadv __x32_compat_sys_preadv64 535 x32 pwritev __x32_compat_sys_pwritev64 536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo -537 x32 recvmmsg __x32_compat_sys_recvmmsg +537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64 538 x32 sendmmsg __x32_compat_sys_sendmmsg 539 x32 process_vm_readv __x32_compat_sys_process_vm_readv 540 x32 process_vm_writev __x32_compat_sys_process_vm_writev -- cgit From 1a787fc5ba18ac767e635c58d06a0b46876184e3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Mar 2019 13:30:08 -0300 Subject: tools headers uapi: Sync copy of asm-generic/unistd.h with the kernel sources To get the changes in: c8ce48f06503 ("asm-generic: Make time32 syscall numbers optional") Silencing these tools/perf build warnings: Warning: Kernel ABI header at 'tools/arch/arm64/include/uapi/asm/unistd.h' differs from latest version at 'arch/arm64/include/uapi/asm/unistd.h' diff -u tools/arch/arm64/include/uapi/asm/unistd.h arch/arm64/include/uapi/asm/unistd.h Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Test built it under the ubuntu:14.04.4-x-linaro-arm64 cross build environment and looked at the syscall table at /tmp/build/perf/arch/arm64/include/generated/asm/syscalls.c, looks ok. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Arnd Bergmann Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Kim Phillips Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Thomas Richter Link: https://lkml.kernel.org/n/tip-e4w7ngsmkq48bd6st52ty2kb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/uapi/asm/unistd.h | 2 + tools/include/uapi/asm-generic/unistd.h | 149 ++++++++++++++++++++++------- 2 files changed, 118 insertions(+), 33 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index dae1584cf017..4703d218663a 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -17,5 +17,7 @@ #define __ARCH_WANT_RENAMEAT #define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SET_GET_RLIMIT +#define __ARCH_WANT_TIME32_SYSCALLS #include diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index d90127298f12..12cdf611d217 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -38,8 +38,10 @@ __SYSCALL(__NR_io_destroy, sys_io_destroy) __SC_COMP(__NR_io_submit, sys_io_submit, compat_sys_io_submit) #define __NR_io_cancel 3 __SYSCALL(__NR_io_cancel, sys_io_cancel) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_getevents 4 -__SC_COMP(__NR_io_getevents, sys_io_getevents, compat_sys_io_getevents) +__SC_3264(__NR_io_getevents, sys_io_getevents_time32, sys_io_getevents) +#endif /* fs/xattr.c */ #define __NR_setxattr 5 @@ -179,7 +181,7 @@ __SYSCALL(__NR_fchownat, sys_fchownat) #define __NR_fchown 55 __SYSCALL(__NR_fchown, sys_fchown) #define __NR_openat 56 -__SC_COMP(__NR_openat, sys_openat, compat_sys_openat) +__SYSCALL(__NR_openat, sys_openat) #define __NR_close 57 __SYSCALL(__NR_close, sys_close) #define __NR_vhangup 58 @@ -222,10 +224,12 @@ __SC_COMP(__NR_pwritev, sys_pwritev, compat_sys_pwritev) __SYSCALL(__NR3264_sendfile, sys_sendfile64) /* fs/select.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_pselect6 72 -__SC_COMP(__NR_pselect6, sys_pselect6, compat_sys_pselect6) +__SC_COMP_3264(__NR_pselect6, sys_pselect6_time32, sys_pselect6, compat_sys_pselect6_time32) #define __NR_ppoll 73 -__SC_COMP(__NR_ppoll, sys_ppoll, compat_sys_ppoll) +__SC_COMP_3264(__NR_ppoll, sys_ppoll_time32, sys_ppoll, compat_sys_ppoll_time32) +#endif /* fs/signalfd.c */ #define __NR_signalfd4 74 @@ -269,16 +273,20 @@ __SC_COMP(__NR_sync_file_range, sys_sync_file_range, \ /* fs/timerfd.c */ #define __NR_timerfd_create 85 __SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timerfd_settime 86 -__SC_COMP(__NR_timerfd_settime, sys_timerfd_settime, \ - compat_sys_timerfd_settime) +__SC_3264(__NR_timerfd_settime, sys_timerfd_settime32, \ + sys_timerfd_settime) #define __NR_timerfd_gettime 87 -__SC_COMP(__NR_timerfd_gettime, sys_timerfd_gettime, \ - compat_sys_timerfd_gettime) +__SC_3264(__NR_timerfd_gettime, sys_timerfd_gettime32, \ + sys_timerfd_gettime) +#endif /* fs/utimes.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_utimensat 88 -__SC_COMP(__NR_utimensat, sys_utimensat, compat_sys_utimensat) +__SC_3264(__NR_utimensat, sys_utimensat_time32, sys_utimensat) +#endif /* kernel/acct.c */ #define __NR_acct 89 @@ -309,8 +317,10 @@ __SYSCALL(__NR_set_tid_address, sys_set_tid_address) __SYSCALL(__NR_unshare, sys_unshare) /* kernel/futex.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_futex 98 -__SC_COMP(__NR_futex, sys_futex, compat_sys_futex) +__SC_3264(__NR_futex, sys_futex_time32, sys_futex) +#endif #define __NR_set_robust_list 99 __SC_COMP(__NR_set_robust_list, sys_set_robust_list, \ compat_sys_set_robust_list) @@ -319,8 +329,10 @@ __SC_COMP(__NR_get_robust_list, sys_get_robust_list, \ compat_sys_get_robust_list) /* kernel/hrtimer.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_nanosleep 101 -__SC_COMP(__NR_nanosleep, sys_nanosleep, compat_sys_nanosleep) +__SC_3264(__NR_nanosleep, sys_nanosleep_time32, sys_nanosleep) +#endif /* kernel/itimer.c */ #define __NR_getitimer 102 @@ -341,23 +353,29 @@ __SYSCALL(__NR_delete_module, sys_delete_module) /* kernel/posix-timers.c */ #define __NR_timer_create 107 __SC_COMP(__NR_timer_create, sys_timer_create, compat_sys_timer_create) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_gettime 108 -__SC_COMP(__NR_timer_gettime, sys_timer_gettime, compat_sys_timer_gettime) +__SC_3264(__NR_timer_gettime, sys_timer_gettime32, sys_timer_gettime) +#endif #define __NR_timer_getoverrun 109 __SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_timer_settime 110 -__SC_COMP(__NR_timer_settime, sys_timer_settime, compat_sys_timer_settime) +__SC_3264(__NR_timer_settime, sys_timer_settime32, sys_timer_settime) +#endif #define __NR_timer_delete 111 __SYSCALL(__NR_timer_delete, sys_timer_delete) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_settime 112 -__SC_COMP(__NR_clock_settime, sys_clock_settime, compat_sys_clock_settime) +__SC_3264(__NR_clock_settime, sys_clock_settime32, sys_clock_settime) #define __NR_clock_gettime 113 -__SC_COMP(__NR_clock_gettime, sys_clock_gettime, compat_sys_clock_gettime) +__SC_3264(__NR_clock_gettime, sys_clock_gettime32, sys_clock_gettime) #define __NR_clock_getres 114 -__SC_COMP(__NR_clock_getres, sys_clock_getres, compat_sys_clock_getres) +__SC_3264(__NR_clock_getres, sys_clock_getres_time32, sys_clock_getres) #define __NR_clock_nanosleep 115 -__SC_COMP(__NR_clock_nanosleep, sys_clock_nanosleep, \ - compat_sys_clock_nanosleep) +__SC_3264(__NR_clock_nanosleep, sys_clock_nanosleep_time32, \ + sys_clock_nanosleep) +#endif /* kernel/printk.c */ #define __NR_syslog 116 @@ -388,9 +406,11 @@ __SYSCALL(__NR_sched_yield, sys_sched_yield) __SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) #define __NR_sched_get_priority_min 126 __SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_sched_rr_get_interval 127 -__SC_COMP(__NR_sched_rr_get_interval, sys_sched_rr_get_interval, \ - compat_sys_sched_rr_get_interval) +__SC_3264(__NR_sched_rr_get_interval, sys_sched_rr_get_interval_time32, \ + sys_sched_rr_get_interval) +#endif /* kernel/signal.c */ #define __NR_restart_syscall 128 @@ -411,9 +431,11 @@ __SC_COMP(__NR_rt_sigaction, sys_rt_sigaction, compat_sys_rt_sigaction) __SC_COMP(__NR_rt_sigprocmask, sys_rt_sigprocmask, compat_sys_rt_sigprocmask) #define __NR_rt_sigpending 136 __SC_COMP(__NR_rt_sigpending, sys_rt_sigpending, compat_sys_rt_sigpending) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_rt_sigtimedwait 137 -__SC_COMP(__NR_rt_sigtimedwait, sys_rt_sigtimedwait, \ - compat_sys_rt_sigtimedwait) +__SC_COMP_3264(__NR_rt_sigtimedwait, sys_rt_sigtimedwait_time32, \ + sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time32) +#endif #define __NR_rt_sigqueueinfo 138 __SC_COMP(__NR_rt_sigqueueinfo, sys_rt_sigqueueinfo, \ compat_sys_rt_sigqueueinfo) @@ -467,10 +489,15 @@ __SYSCALL(__NR_uname, sys_newuname) __SYSCALL(__NR_sethostname, sys_sethostname) #define __NR_setdomainname 162 __SYSCALL(__NR_setdomainname, sys_setdomainname) + +#ifdef __ARCH_WANT_SET_GET_RLIMIT +/* getrlimit and setrlimit are superseded with prlimit64 */ #define __NR_getrlimit 163 __SC_COMP(__NR_getrlimit, sys_getrlimit, compat_sys_getrlimit) #define __NR_setrlimit 164 __SC_COMP(__NR_setrlimit, sys_setrlimit, compat_sys_setrlimit) +#endif + #define __NR_getrusage 165 __SC_COMP(__NR_getrusage, sys_getrusage, compat_sys_getrusage) #define __NR_umask 166 @@ -481,12 +508,14 @@ __SYSCALL(__NR_prctl, sys_prctl) __SYSCALL(__NR_getcpu, sys_getcpu) /* kernel/time.c */ +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_gettimeofday 169 __SC_COMP(__NR_gettimeofday, sys_gettimeofday, compat_sys_gettimeofday) #define __NR_settimeofday 170 __SC_COMP(__NR_settimeofday, sys_settimeofday, compat_sys_settimeofday) #define __NR_adjtimex 171 -__SC_COMP(__NR_adjtimex, sys_adjtimex, compat_sys_adjtimex) +__SC_3264(__NR_adjtimex, sys_adjtimex_time32, sys_adjtimex) +#endif /* kernel/timer.c */ #define __NR_getpid 172 @@ -511,11 +540,13 @@ __SC_COMP(__NR_sysinfo, sys_sysinfo, compat_sys_sysinfo) __SC_COMP(__NR_mq_open, sys_mq_open, compat_sys_mq_open) #define __NR_mq_unlink 181 __SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_mq_timedsend 182 -__SC_COMP(__NR_mq_timedsend, sys_mq_timedsend, compat_sys_mq_timedsend) +__SC_3264(__NR_mq_timedsend, sys_mq_timedsend_time32, sys_mq_timedsend) #define __NR_mq_timedreceive 183 -__SC_COMP(__NR_mq_timedreceive, sys_mq_timedreceive, \ - compat_sys_mq_timedreceive) +__SC_3264(__NR_mq_timedreceive, sys_mq_timedreceive_time32, \ + sys_mq_timedreceive) +#endif #define __NR_mq_notify 184 __SC_COMP(__NR_mq_notify, sys_mq_notify, compat_sys_mq_notify) #define __NR_mq_getsetattr 185 @@ -536,8 +567,10 @@ __SC_COMP(__NR_msgsnd, sys_msgsnd, compat_sys_msgsnd) __SYSCALL(__NR_semget, sys_semget) #define __NR_semctl 191 __SC_COMP(__NR_semctl, sys_semctl, compat_sys_semctl) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_semtimedop 192 -__SC_COMP(__NR_semtimedop, sys_semtimedop, compat_sys_semtimedop) +__SC_COMP(__NR_semtimedop, sys_semtimedop, sys_semtimedop_time32) +#endif #define __NR_semop 193 __SYSCALL(__NR_semop, sys_semop) @@ -658,8 +691,10 @@ __SC_COMP(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo, \ __SYSCALL(__NR_perf_event_open, sys_perf_event_open) #define __NR_accept4 242 __SYSCALL(__NR_accept4, sys_accept4) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_recvmmsg 243 -__SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg) +__SC_COMP_3264(__NR_recvmmsg, sys_recvmmsg_time32, sys_recvmmsg, compat_sys_recvmmsg_time32) +#endif /* * Architectures may provide up to 16 syscalls of their own @@ -667,8 +702,10 @@ __SC_COMP(__NR_recvmmsg, sys_recvmmsg, compat_sys_recvmmsg) */ #define __NR_arch_specific_syscall 244 +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_wait4 260 __SC_COMP(__NR_wait4, sys_wait4, compat_sys_wait4) +#endif #define __NR_prlimit64 261 __SYSCALL(__NR_prlimit64, sys_prlimit64) #define __NR_fanotify_init 262 @@ -678,10 +715,11 @@ __SYSCALL(__NR_fanotify_mark, sys_fanotify_mark) #define __NR_name_to_handle_at 264 __SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) #define __NR_open_by_handle_at 265 -__SC_COMP(__NR_open_by_handle_at, sys_open_by_handle_at, \ - compat_sys_open_by_handle_at) +__SYSCALL(__NR_open_by_handle_at, sys_open_by_handle_at) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_clock_adjtime 266 -__SC_COMP(__NR_clock_adjtime, sys_clock_adjtime, compat_sys_clock_adjtime) +__SC_3264(__NR_clock_adjtime, sys_clock_adjtime32, sys_clock_adjtime) +#endif #define __NR_syncfs 267 __SYSCALL(__NR_syncfs, sys_syncfs) #define __NR_setns 268 @@ -734,15 +772,60 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc) __SYSCALL(__NR_pkey_free, sys_pkey_free) #define __NR_statx 291 __SYSCALL(__NR_statx, sys_statx) +#if defined(__ARCH_WANT_TIME32_SYSCALLS) || __BITS_PER_LONG != 32 #define __NR_io_pgetevents 292 -__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) +__SC_COMP_3264(__NR_io_pgetevents, sys_io_pgetevents_time32, sys_io_pgetevents, compat_sys_io_pgetevents) +#endif #define __NR_rseq 293 __SYSCALL(__NR_rseq, sys_rseq) #define __NR_kexec_file_load 294 __SYSCALL(__NR_kexec_file_load, sys_kexec_file_load) +/* 295 through 402 are unassigned to sync up with generic numbers, don't use */ +#if __BITS_PER_LONG == 32 +#define __NR_clock_gettime64 403 +__SYSCALL(__NR_clock_gettime64, sys_clock_gettime) +#define __NR_clock_settime64 404 +__SYSCALL(__NR_clock_settime64, sys_clock_settime) +#define __NR_clock_adjtime64 405 +__SYSCALL(__NR_clock_adjtime64, sys_clock_adjtime) +#define __NR_clock_getres_time64 406 +__SYSCALL(__NR_clock_getres_time64, sys_clock_getres) +#define __NR_clock_nanosleep_time64 407 +__SYSCALL(__NR_clock_nanosleep_time64, sys_clock_nanosleep) +#define __NR_timer_gettime64 408 +__SYSCALL(__NR_timer_gettime64, sys_timer_gettime) +#define __NR_timer_settime64 409 +__SYSCALL(__NR_timer_settime64, sys_timer_settime) +#define __NR_timerfd_gettime64 410 +__SYSCALL(__NR_timerfd_gettime64, sys_timerfd_gettime) +#define __NR_timerfd_settime64 411 +__SYSCALL(__NR_timerfd_settime64, sys_timerfd_settime) +#define __NR_utimensat_time64 412 +__SYSCALL(__NR_utimensat_time64, sys_utimensat) +#define __NR_pselect6_time64 413 +__SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) +#define __NR_ppoll_time64 414 +__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) +#define __NR_io_pgetevents_time64 416 +__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +#define __NR_recvmmsg_time64 417 +__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) +#define __NR_mq_timedsend_time64 418 +__SYSCALL(__NR_mq_timedsend_time64, sys_mq_timedsend) +#define __NR_mq_timedreceive_time64 419 +__SYSCALL(__NR_mq_timedreceive_time64, sys_mq_timedreceive) +#define __NR_semtimedop_time64 420 +__SYSCALL(__NR_semtimedop_time64, sys_semtimedop) +#define __NR_rt_sigtimedwait_time64 421 +__SC_COMP(__NR_rt_sigtimedwait_time64, sys_rt_sigtimedwait, compat_sys_rt_sigtimedwait_time64) +#define __NR_futex_time64 422 +__SYSCALL(__NR_futex_time64, sys_futex) +#define __NR_sched_rr_get_interval_time64 423 +__SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) +#endif #undef __NR_syscalls -#define __NR_syscalls 295 +#define __NR_syscalls 424 /* * 32 bit systems traditionally used different -- cgit From 2fb71043e8894ca78258f7458a2db2eb3a142a22 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Mar 2019 13:39:48 -0300 Subject: tools headers uapi: Update linux/in.h copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To get the changes in: 4effd28c1245 ("bridge: join all-snoopers multicast address") That do not generate any changes in tools/ use of this file. Silences this tools/perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/in.h' differs from latest version at 'include/uapi/linux/in.h' diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h Cc: Adrian Hunter Cc: David S. Miller Cc: Jiri Olsa Cc: Linus Lüssing Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-ifpl634035266ho6wxuqgo81@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/in.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index a55cb8b10165..e7ad9d350a28 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -292,10 +292,11 @@ struct sockaddr_in { #define IN_LOOPBACK(a) ((((long int) (a)) & 0xff000000) == 0x7f000000) /* Defines for Multicast INADDR */ -#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ -#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ -#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ -#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ +#define INADDR_UNSPEC_GROUP 0xe0000000U /* 224.0.0.0 */ +#define INADDR_ALLHOSTS_GROUP 0xe0000001U /* 224.0.0.1 */ +#define INADDR_ALLRTRS_GROUP 0xe0000002U /* 224.0.0.2 */ +#define INADDR_ALLSNOOPERS_GROUP 0xe000006aU /* 224.0.0.106 */ +#define INADDR_MAX_LOCAL_GROUP 0xe00000ffU /* 224.0.0.255 */ #endif /* contains the htonl type stuff.. */ -- cgit From e87e548126cdc66fd4f194b38b59f351b6e5d3e8 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:44:52 -0700 Subject: perf script: Filter COMM/FORK/.. events by CPU The --cpu option only filtered samples. Filter other perf events, such as COMM, FORK, SWITCH by the CPU too. Reported-by: Jiri Olsa Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 71 ++++++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 111787e83784..b695b20ffc8a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1933,6 +1933,13 @@ static int cleanup_scripting(void) return scripting_ops ? scripting_ops->stop_script() : 0; } +static bool filter_cpu(struct perf_sample *sample) +{ + if (cpu_list) + return !test_bit(sample->cpu, cpu_bitmap); + return false; +} + static int process_sample_event(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -1967,7 +1974,7 @@ static int process_sample_event(struct perf_tool *tool, if (al.filtered) goto out_put; - if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) + if (filter_cpu(sample)) goto out_put; if (scripting_ops) @@ -2052,9 +2059,11 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - perf_sample__fprintf_start(sample, thread, evsel, + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, PERF_RECORD_COMM, stdout); - perf_event__fprintf(event, stdout); + perf_event__fprintf(event, stdout); + } ret = 0; out: thread__put(thread); @@ -2088,9 +2097,11 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_NAMESPACES, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_NAMESPACES, stdout); + perf_event__fprintf(event, stdout); + } ret = 0; out: thread__put(thread); @@ -2122,9 +2133,11 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_FORK, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_FORK, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; @@ -2152,9 +2165,11 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_EXIT, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_EXIT, stdout); + perf_event__fprintf(event, stdout); + } if (perf_event__process_exit(tool, event, sample, machine) < 0) err = -1; @@ -2188,9 +2203,11 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2220,9 +2237,11 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_MMAP2, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP2, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2247,9 +2266,11 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_SWITCH, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SWITCH, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } @@ -2270,9 +2291,11 @@ process_lost_event(struct perf_tool *tool, if (thread == NULL) return -1; - perf_sample__fprintf_start(sample, thread, evsel, - PERF_RECORD_LOST, stdout); - perf_event__fprintf(event, stdout); + if (!filter_cpu(sample)) { + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_LOST, stdout); + perf_event__fprintf(event, stdout); + } thread__put(thread); return 0; } -- cgit From 3723908d05834c76fd5cc9ecd17b0851342e1df4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:44:54 -0700 Subject: perf report: Support time sort key Add a time sort key to perf report to display samples for different time quantums separately. This allows easier analysis of workloads that change over time, and also will allow looking at the context of samples. % perf record ... % perf report --sort time,overhead,symbol --time-quantum 1ms --stdio ... 0.67% 277061.87300 [.] _dl_start 0.50% 277061.87300 [.] f1 0.50% 277061.87300 [.] f2 0.33% 277061.87300 [.] main 0.29% 277061.87300 [.] _dl_lookup_symbol_x 0.29% 277061.87300 [.] dl_main 0.29% 277061.87300 [.] do_lookup_x 0.17% 277061.87300 [.] _dl_debug_initialize 0.17% 277061.87300 [.] _dl_init_paths 0.08% 277061.87300 [.] check_match 0.04% 277061.87300 [.] _dl_count_modids 1.33% 277061.87400 [.] f1 1.33% 277061.87400 [.] f2 1.33% 277061.87400 [.] main 1.17% 277061.87500 [.] main 1.08% 277061.87500 [.] f1 1.08% 277061.87500 [.] f2 1.00% 277061.87600 [.] main 0.83% 277061.87600 [.] f1 0.83% 277061.87600 [.] f2 1.00% 277061.87700 [.] main Committer notes: Rename 'time' argument to hist_time() to htime to overcome this in older distros: cc1: warnings being treated as errors util/hist.c: In function 'hist_time': util/hist.c:251: error: declaration of 'time' shadows a global declaration /usr/include/time.h:186: error: shadowed declaration is here Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 ++ tools/perf/util/hist.c | 11 +++++++++ tools/perf/util/hist.h | 1 + tools/perf/util/sort.c | 39 ++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 5 files changed, 55 insertions(+) diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 9ec1702bccdd..546d87221ad8 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -105,6 +105,8 @@ OPTIONS guest machine - sample: Number of sample - period: Raw number of event count of sample + - time: Separate the samples by time stamp with the resolution specified by + --time-quantum (default 100ms). Specify with overhead and before it. By default, comm, dso and symbol keys are used. (i.e. --sort comm,dso,symbol) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f9eb95bf3938..34c0f00c68d1 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -19,6 +19,7 @@ #include #include #include +#include static bool hists__filter_entry_by_dso(struct hists *hists, struct hist_entry *he); @@ -192,6 +193,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_MEM_LVL, 21 + 3); hists__new_col_len(hists, HISTC_LOCAL_WEIGHT, 12); hists__new_col_len(hists, HISTC_GLOBAL_WEIGHT, 12); + hists__new_col_len(hists, HISTC_TIME, 12); if (h->srcline) { len = MAX(strlen(h->srcline), strlen(sort_srcline.se_header)); @@ -246,6 +248,14 @@ static void he_stat__add_cpumode_period(struct he_stat *he_stat, } } +static long hist_time(unsigned long htime) +{ + unsigned long time_quantum = symbol_conf.time_quantum; + if (time_quantum) + return (htime / time_quantum) * time_quantum; + return htime; +} + static void he_stat__add_period(struct he_stat *he_stat, u64 period, u64 weight) { @@ -635,6 +645,7 @@ __hists__add_entry(struct hists *hists, .raw_data = sample->raw_data, .raw_size = sample->raw_size, .ops = ops, + .time = hist_time(sample->time), }, *he = hists__findnew_entry(hists, &entry, al, sample_self); if (!hists->has_callchains && he && he->callchain_size != 0) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 4af27fbab24f..6279eca56409 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -31,6 +31,7 @@ enum hist_filter { enum hist_column { HISTC_SYMBOL, + HISTC_TIME, HISTC_DSO, HISTC_THREAD, HISTC_COMM, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index d2299e912e59..bdd30cab51cb 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "sort.h" #include "hist.h" #include "comm.h" @@ -15,6 +16,7 @@ #include #include "mem-events.h" #include "annotate.h" +#include "time-utils.h" #include regex_t parent_regex; @@ -654,6 +656,42 @@ struct sort_entry sort_socket = { .se_width_idx = HISTC_SOCKET, }; +/* --sort time */ + +static int64_t +sort__time_cmp(struct hist_entry *left, struct hist_entry *right) +{ + return right->time - left->time; +} + +static int hist_entry__time_snprintf(struct hist_entry *he, char *bf, + size_t size, unsigned int width) +{ + unsigned long secs; + unsigned long long nsecs; + char he_time[32]; + + nsecs = he->time; + secs = nsecs / NSEC_PER_SEC; + nsecs -= secs * NSEC_PER_SEC; + + if (symbol_conf.nanosecs) + snprintf(he_time, sizeof he_time, "%5lu.%09llu: ", + secs, nsecs); + else + timestamp__scnprintf_usec(he->time, he_time, + sizeof(he_time)); + + return repsep_snprintf(bf, size, "%-.*s", width, he_time); +} + +struct sort_entry sort_time = { + .se_header = "Time", + .se_cmp = sort__time_cmp, + .se_snprintf = hist_entry__time_snprintf, + .se_width_idx = HISTC_TIME, +}; + /* --sort trace */ static char *get_trace_output(struct hist_entry *he) @@ -1634,6 +1672,7 @@ static struct sort_dimension common_sort_dimensions[] = { DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size), DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id), DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null), + DIM(SORT_TIME, "time", sort_time), }; #undef DIM diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 2fbee0b1011c..19dceb7f6145 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -135,6 +135,7 @@ struct hist_entry { char *srcfile; struct symbol *parent; struct branch_info *branch_info; + long time; struct hists *hists; struct mem_info *mem_info; void *raw_data; @@ -231,6 +232,7 @@ enum sort_type { SORT_DSO_SIZE, SORT_CGROUP_ID, SORT_SYM_IPC_NULL, + SORT_TIME, /* branch stack specific sort keys */ __SORT_BRANCH_STACK, -- cgit From 1d6c49df74b0706af13a7d707638f0db374eaf88 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:44:56 -0700 Subject: perf report: Support running scripts for current time range When using the time sort key, add new context menus to run scripts for only the currently selected time range. Compute the correct range for the selection add pass it as the --time option to perf script. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-6-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/hists.c | 83 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 72 insertions(+), 11 deletions(-) diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index aef800d97ea1..f98aeac607dd 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "../../util/callchain.h" #include "../../util/evsel.h" @@ -30,6 +31,7 @@ #include "srcline.h" #include "string2.h" #include "units.h" +#include "time-utils.h" #include "sane_ctype.h" @@ -2338,6 +2340,7 @@ close_file_and_continue: } struct popup_action { + unsigned long time; struct thread *thread; struct map_symbol ms; int socket; @@ -2527,36 +2530,64 @@ static int do_run_script(struct hist_browser *browser __maybe_unused, struct popup_action *act) { - char script_opt[64]; - memset(script_opt, 0, sizeof(script_opt)); + char *script_opt; + int len; + int n = 0; + + len = 100; + if (act->thread) + len += strlen(thread__comm_str(act->thread)); + else if (act->ms.sym) + len += strlen(act->ms.sym->name); + script_opt = malloc(len); + if (!script_opt) + return -1; + script_opt[0] = 0; if (act->thread) { - scnprintf(script_opt, sizeof(script_opt), " -c %s ", + n = scnprintf(script_opt, len, " -c %s ", thread__comm_str(act->thread)); } else if (act->ms.sym) { - scnprintf(script_opt, sizeof(script_opt), " -S %s ", + n = scnprintf(script_opt, len, " -S %s ", act->ms.sym->name); } + if (act->time) { + char start[32], end[32]; + unsigned long starttime = act->time; + unsigned long endtime = act->time + symbol_conf.time_quantum; + + if (starttime == endtime) { /* Display 1ms as fallback */ + starttime -= 1*NSEC_PER_MSEC; + endtime += 1*NSEC_PER_MSEC; + } + timestamp__scnprintf_usec(starttime, start, sizeof start); + timestamp__scnprintf_usec(endtime, end, sizeof end); + n += snprintf(script_opt + n, len - n, " --time %s,%s", start, end); + } + script_browse(script_opt); + free(script_opt); return 0; } static int -add_script_opt(struct hist_browser *browser __maybe_unused, +add_script_opt_2(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, - struct thread *thread, struct symbol *sym) + struct thread *thread, struct symbol *sym, + const char *tstr) { + if (thread) { - if (asprintf(optstr, "Run scripts for samples of thread [%s]", - thread__comm_str(thread)) < 0) + if (asprintf(optstr, "Run scripts for samples of thread [%s]%s", + thread__comm_str(thread), tstr) < 0) return 0; } else if (sym) { - if (asprintf(optstr, "Run scripts for samples of symbol [%s]", - sym->name) < 0) + if (asprintf(optstr, "Run scripts for samples of symbol [%s]%s", + sym->name, tstr) < 0) return 0; } else { - if (asprintf(optstr, "Run scripts for all samples") < 0) + if (asprintf(optstr, "Run scripts for all samples%s", tstr) < 0) return 0; } @@ -2566,6 +2597,36 @@ add_script_opt(struct hist_browser *browser __maybe_unused, return 1; } +static int +add_script_opt(struct hist_browser *browser, + struct popup_action *act, char **optstr, + struct thread *thread, struct symbol *sym) +{ + int n, j; + struct hist_entry *he; + + n = add_script_opt_2(browser, act, optstr, thread, sym, ""); + + he = hist_browser__selected_entry(browser); + if (sort_order && strstr(sort_order, "time")) { + char tstr[128]; + + optstr++; + act++; + j = sprintf(tstr, " in "); + j += timestamp__scnprintf_usec(he->time, tstr + j, + sizeof tstr - j); + j += sprintf(tstr + j, "-"); + timestamp__scnprintf_usec(he->time + symbol_conf.time_quantum, + tstr + j, + sizeof tstr - j); + n += add_script_opt_2(browser, act, optstr, thread, sym, + tstr); + act->time = he->time; + } + return n; +} + static int do_switch_data(struct hist_browser *browser __maybe_unused, struct popup_action *act __maybe_unused) -- cgit From 6f3da20e151f4121548cf598730ae0f9559ae45d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:44:57 -0700 Subject: perf report: Support builtin perf script in scripts menu The scripts menu traditionally only showed custom perf scripts. Allow to run standard perf script with useful default options too. - Normal perf script - perf script with assembler (needs xed installed) - perf script with source code output (needs debuginfo) - perf script with custom arguments Then we automatically select the right options to display the information in the perf.data file. For example with -b display branch contexts. It's not easily possible to check for xed's existence in advance. perf script usually gives sensible error messages when it's not available. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-7-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- tools/perf/ui/browsers/hists.c | 23 ++++--- tools/perf/ui/browsers/scripts.c | 127 ++++++++++++++++++++++++++++++-------- tools/perf/util/hist.h | 8 ++- 4 files changed, 120 insertions(+), 40 deletions(-) diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 35bdfd8b1e71..98d934a36d86 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -750,7 +750,7 @@ static int annotate_browser__run(struct annotate_browser *browser, continue; case 'r': { - script_browse(NULL); + script_browse(NULL, NULL); continue; } case 'k': diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index f98aeac607dd..fb4430f8982c 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2344,6 +2344,7 @@ struct popup_action { struct thread *thread; struct map_symbol ms; int socket; + struct perf_evsel *evsel; int (*fn)(struct hist_browser *browser, struct popup_action *act); }; @@ -2566,7 +2567,7 @@ do_run_script(struct hist_browser *browser __maybe_unused, n += snprintf(script_opt + n, len - n, " --time %s,%s", start, end); } - script_browse(script_opt); + script_browse(script_opt, act->evsel); free(script_opt); return 0; } @@ -2575,7 +2576,7 @@ static int add_script_opt_2(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, struct thread *thread, struct symbol *sym, - const char *tstr) + struct perf_evsel *evsel, const char *tstr) { if (thread) { @@ -2593,6 +2594,7 @@ add_script_opt_2(struct hist_browser *browser __maybe_unused, act->thread = thread; act->ms.sym = sym; + act->evsel = evsel; act->fn = do_run_script; return 1; } @@ -2600,12 +2602,13 @@ add_script_opt_2(struct hist_browser *browser __maybe_unused, static int add_script_opt(struct hist_browser *browser, struct popup_action *act, char **optstr, - struct thread *thread, struct symbol *sym) + struct thread *thread, struct symbol *sym, + struct perf_evsel *evsel) { int n, j; struct hist_entry *he; - n = add_script_opt_2(browser, act, optstr, thread, sym, ""); + n = add_script_opt_2(browser, act, optstr, thread, sym, evsel, ""); he = hist_browser__selected_entry(browser); if (sort_order && strstr(sort_order, "time")) { @@ -2618,10 +2621,9 @@ add_script_opt(struct hist_browser *browser, sizeof tstr - j); j += sprintf(tstr + j, "-"); timestamp__scnprintf_usec(he->time + symbol_conf.time_quantum, - tstr + j, - sizeof tstr - j); + tstr + j, sizeof tstr - j); n += add_script_opt_2(browser, act, optstr, thread, sym, - tstr); + evsel, tstr); act->time = he->time; } return n; @@ -3092,7 +3094,7 @@ skip_annotation: nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], - thread, NULL); + thread, NULL, evsel); } /* * Note that browser->selection != NULL @@ -3107,11 +3109,12 @@ skip_annotation: nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], - NULL, browser->selection->sym); + NULL, browser->selection->sym, + evsel); } } nr_options += add_script_opt(browser, &actions[nr_options], - &options[nr_options], NULL, NULL); + &options[nr_options], NULL, NULL, evsel); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); skip_scripting: diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 7f36630694bf..9e5f87558af6 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -17,36 +17,111 @@ */ #define SCRIPT_FULLPATH_LEN 256 +struct script_config { + const char **names; + char **paths; + int index; + const char *perf; + char extra_format[256]; +}; + +void attr_to_script(char *extra_format, struct perf_event_attr *attr) +{ + extra_format[0] = 0; + if (attr->read_format & PERF_FORMAT_GROUP) + strcat(extra_format, " -F +metric"); + if (attr->sample_type & PERF_SAMPLE_BRANCH_STACK) + strcat(extra_format, " -F +brstackinsn --xed"); + if (attr->sample_type & PERF_SAMPLE_REGS_INTR) + strcat(extra_format, " -F +iregs"); + if (attr->sample_type & PERF_SAMPLE_REGS_USER) + strcat(extra_format, " -F +uregs"); + if (attr->sample_type & PERF_SAMPLE_PHYS_ADDR) + strcat(extra_format, " -F +phys_addr"); +} + +static int add_script_option(const char *name, const char *opt, + struct script_config *c) +{ + c->names[c->index] = name; + if (asprintf(&c->paths[c->index], + "%s script %s -F +metric %s %s", + c->perf, opt, symbol_conf.inline_name ? " --inline" : "", + c->extra_format) < 0) + return -1; + c->index++; + return 0; +} + /* * When success, will copy the full path of the selected script * into the buffer pointed by script_name, and return 0. * Return -1 on failure. */ -static int list_scripts(char *script_name) +static int list_scripts(char *script_name, bool *custom, + struct perf_evsel *evsel) { - char *buf, *names[SCRIPT_MAX_NO], *paths[SCRIPT_MAX_NO]; - int i, num, choice, ret = -1; + char *buf, *paths[SCRIPT_MAX_NO], *names[SCRIPT_MAX_NO]; + int i, num, choice; + int ret = 0; + int max_std, custom_perf; + char pbuf[256]; + const char *perf = perf_exe(pbuf, sizeof pbuf); + struct script_config scriptc = { + .names = (const char **)names, + .paths = paths, + .perf = perf + }; + + script_name[0] = 0; /* Preset the script name to SCRIPT_NAMELEN */ buf = malloc(SCRIPT_MAX_NO * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN)); if (!buf) - return ret; + return -1; + + if (evsel) + attr_to_script(scriptc.extra_format, &evsel->attr); + add_script_option("Show individual samples", "", &scriptc); + add_script_option("Show individual samples with assembler", "-F +insn --xed", + &scriptc); + add_script_option("Show individual samples with source", "-F +srcline,+srccode", + &scriptc); + custom_perf = scriptc.index; + add_script_option("Show samples with custom perf script arguments", "", &scriptc); + i = scriptc.index; + max_std = i; - for (i = 0; i < SCRIPT_MAX_NO; i++) { - names[i] = buf + i * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN); + for (; i < SCRIPT_MAX_NO; i++) { + names[i] = buf + (i - max_std) * (SCRIPT_NAMELEN + SCRIPT_FULLPATH_LEN); paths[i] = names[i] + SCRIPT_NAMELEN; } - num = find_scripts(names, paths); - if (num > 0) { - choice = ui__popup_menu(num, names); - if (choice < num && choice >= 0) { - strcpy(script_name, paths[choice]); - ret = 0; - } + num = find_scripts(names + max_std, paths + max_std); + if (num < 0) + num = 0; + choice = ui__popup_menu(num + max_std, (char * const *)names); + if (choice < 0) { + ret = -1; + goto out; } + if (choice == custom_perf) { + char script_args[50]; + int key = ui_browser__input_window("perf script command", + "Enter perf script command line (without perf script prefix)", + script_args, "", 0); + if (key != K_ENTER) + return -1; + sprintf(script_name, "%s script %s", perf, script_args); + } else if (choice < num + max_std) { + strcpy(script_name, paths[choice]); + } + *custom = choice >= max_std; +out: free(buf); + for (i = 0; i < max_std; i++) + free(paths[i]); return ret; } @@ -66,27 +141,25 @@ static void run_script(char *cmd) SLsmg_refresh(); } -int script_browse(const char *script_opt) +int script_browse(const char *script_opt, struct perf_evsel *evsel) { - char cmd[SCRIPT_FULLPATH_LEN*2], script_name[SCRIPT_FULLPATH_LEN]; + char *cmd, script_name[SCRIPT_FULLPATH_LEN]; + bool custom = false; memset(script_name, 0, SCRIPT_FULLPATH_LEN); - if (list_scripts(script_name)) + if (list_scripts(script_name, &custom, evsel)) return -1; - sprintf(cmd, "perf script -s %s ", script_name); - - if (script_opt) - strcat(cmd, script_opt); - - if (input_name) { - strcat(cmd, " -i "); - strcat(cmd, input_name); - } - - strcat(cmd, " 2>&1 | less"); + if (asprintf(&cmd, "%s%s %s %s%s 2>&1 | less", + custom ? "perf script -s " : "", + script_name, + script_opt ? script_opt : "", + input_name ? "-i " : "", + input_name ? input_name : "") < 0) + return -1; run_script(cmd); + free(cmd); return 0; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 6279eca56409..2113a6639cea 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -436,6 +436,8 @@ struct annotation_options; #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" +void attr_to_script(char *buf, struct perf_event_attr *attr); + int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, struct hist_browser_timer *hbt, struct annotation_options *annotation_opts); @@ -450,7 +452,8 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct perf_env *env, bool warn_lost_event, struct annotation_options *annotation_options); -int script_browse(const char *script_opt); + +int script_browse(const char *script_opt, struct perf_evsel *evsel); #else static inline int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, @@ -479,7 +482,8 @@ static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, return 0; } -static inline int script_browse(const char *script_opt __maybe_unused) +static inline int script_browse(const char *script_opt __maybe_unused, + struct perf_evsel *evsel __maybe_unused) { return 0; } -- cgit From 4968ac8fb7c378e2bc40b7e9bd97768fa8c7aa32 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:44:58 -0700 Subject: perf report: Implement browsing of individual samples Now 'perf report' can show whole time periods with 'perf script', but the user still has to find individual samples of interest manually. It would be expensive and complicated to search for the right samples in the whole perf file. Typically users only need to look at a small number of samples for useful analysis. Also the full scripts tend to show samples of all CPUs and all threads mixed up, which can be very confusing on larger systems. Add a new --samples option to save a small random number of samples per hist entry. Use a reservoir sample technique to select a representatve number of samples. Then allow browsing the samples using 'perf script' as part of the hist entry context menu. This automatically adds the right filters, so only the thread or cpu of the sample is displayed. Then we use less' search functionality to directly jump the to the time stamp of the selected sample. It uses different menus for assembler and source display. Assembler needs xed installed and source needs debuginfo. Currently it only supports as many samples as fit on the screen due to some limitations in the slang ui code. Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311174605.GA29294@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 6 ++ tools/perf/Documentation/perf-report.txt | 4 ++ tools/perf/builtin-report.c | 2 + tools/perf/ui/browsers/Build | 1 + tools/perf/ui/browsers/hists.c | 47 ++++++++++++++++ tools/perf/ui/browsers/res_sample.c | 94 ++++++++++++++++++++++++++++++++ tools/perf/ui/browsers/scripts.c | 2 +- tools/perf/util/hist.c | 39 +++++++++++++ tools/perf/util/hist.h | 22 ++++++++ tools/perf/util/sort.h | 8 +++ tools/perf/util/symbol.c | 1 + tools/perf/util/symbol_conf.h | 1 + 12 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 tools/perf/ui/browsers/res_sample.c diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 86f3dcc15f83..2d0fb7613134 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -584,6 +584,12 @@ llvm.*:: llvm.opts:: Options passed to llc. +samples.*:: + + samples.context:: + Define how many ns worth of time to show + around samples in perf report sample context browser. + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 546d87221ad8..f441baa794ce 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -461,6 +461,10 @@ include::itrace.txt[] --socket-filter:: Only report the samples on the processor socket that match with this filter +--samples=N:: + Save N individual samples for each histogram entry to show context in perf + report tui browser. + --raw-trace:: When displaying traceevent output, do not use print fmt or plugins. diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 05c8dd41106c..1921aaa9cece 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1159,6 +1159,8 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"), + OPT_INTEGER(0, "samples", &symbol_conf.res_sample, + "Number of samples to save per histogram entry for individual browsing"), OPT_CALLBACK(0, "percent-limit", &report, "percent", "Don't show entries under that percent", parse_percent_limit), OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build index 8fee56b46502..fdf86f7981ca 100644 --- a/tools/perf/ui/browsers/Build +++ b/tools/perf/ui/browsers/Build @@ -3,6 +3,7 @@ perf-y += hists.o perf-y += map.o perf-y += scripts.o perf-y += header.o +perf-y += res_sample.o CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index fb4430f8982c..3421ecbdd3f0 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -1226,6 +1226,8 @@ void hist_browser__init_hpp(void) hist_browser__hpp_color_overhead_guest_us; perf_hpp__format[PERF_HPP__OVERHEAD_ACC].color = hist_browser__hpp_color_overhead_acc; + + res_sample_init(); } static int hist_browser__show_entry(struct hist_browser *browser, @@ -2345,6 +2347,7 @@ struct popup_action { struct map_symbol ms; int socket; struct perf_evsel *evsel; + enum rstype rstype; int (*fn)(struct hist_browser *browser, struct popup_action *act); }; @@ -2572,6 +2575,17 @@ do_run_script(struct hist_browser *browser __maybe_unused, return 0; } +static int +do_res_sample_script(struct hist_browser *browser __maybe_unused, + struct popup_action *act) +{ + struct hist_entry *he; + + he = hist_browser__selected_entry(browser); + res_sample_browse(he->res_samples, he->num_res, act->evsel, act->rstype); + return 0; +} + static int add_script_opt_2(struct hist_browser *browser __maybe_unused, struct popup_action *act, char **optstr, @@ -2629,6 +2643,27 @@ add_script_opt(struct hist_browser *browser, return n; } +static int +add_res_sample_opt(struct hist_browser *browser __maybe_unused, + struct popup_action *act, char **optstr, + struct res_sample *res_sample, + struct perf_evsel *evsel, + enum rstype type) +{ + if (!res_sample) + return 0; + + if (asprintf(optstr, "Show context for individual samples %s", + type == A_ASM ? "with assembler" : + type == A_SOURCE ? "with source" : "") < 0) + return 0; + + act->fn = do_res_sample_script; + act->evsel = evsel; + act->rstype = type; + return 1; +} + static int do_switch_data(struct hist_browser *browser __maybe_unused, struct popup_action *act __maybe_unused) @@ -3115,6 +3150,18 @@ skip_annotation: } nr_options += add_script_opt(browser, &actions[nr_options], &options[nr_options], NULL, NULL, evsel); + nr_options += add_res_sample_opt(browser, &actions[nr_options], + &options[nr_options], + hist_browser__selected_entry(browser)->res_samples, + evsel, A_NORMAL); + nr_options += add_res_sample_opt(browser, &actions[nr_options], + &options[nr_options], + hist_browser__selected_entry(browser)->res_samples, + evsel, A_ASM); + nr_options += add_res_sample_opt(browser, &actions[nr_options], + &options[nr_options], + hist_browser__selected_entry(browser)->res_samples, + evsel, A_SOURCE); nr_options += add_switch_opt(browser, &actions[nr_options], &options[nr_options]); skip_scripting: diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c new file mode 100644 index 000000000000..884ef2a92c15 --- /dev/null +++ b/tools/perf/ui/browsers/res_sample.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Display a menu with individual samples to browse with perf script */ +#include "util.h" +#include "hist.h" +#include "evsel.h" +#include "hists.h" +#include "sort.h" +#include "config.h" +#include "time-utils.h" +#include + +static u64 context_len = 10 * NSEC_PER_MSEC; + +static int res_sample_config(const char *var, const char *value, void *data __maybe_unused) +{ + if (!strcmp(var, "samples.context")) + return perf_config_u64(&context_len, var, value); + return 0; +} + +void res_sample_init(void) +{ + perf_config(res_sample_config, NULL); +} + +int res_sample_browse(struct res_sample *res_samples, int num_res, + struct perf_evsel *evsel, enum rstype rstype) +{ + char **names; + int i, n; + int choice; + char *cmd; + char pbuf[256], tidbuf[32], cpubuf[32]; + const char *perf = perf_exe(pbuf, sizeof pbuf); + char trange[128], tsample[64]; + struct res_sample *r; + char extra_format[256]; + + /* For now since ui__popup_menu doesn't like lists that don't fit */ + num_res = max(min(SLtt_Screen_Rows - 4, num_res), 0); + + names = calloc(num_res, sizeof(char *)); + if (!names) + return -1; + for (i = 0; i < num_res; i++) { + char tbuf[64]; + + timestamp__scnprintf_nsec(res_samples[i].time, tbuf, sizeof tbuf); + if (asprintf(&names[i], "%s: CPU %d tid %d", tbuf, + res_samples[i].cpu, res_samples[i].tid) < 0) { + while (--i >= 0) + free(names[i]); + free(names); + return -1; + } + } + choice = ui__popup_menu(num_res, names); + for (i = 0; i < num_res; i++) + free(names[i]); + free(names); + + if (choice < 0 || choice >= num_res) + return -1; + r = &res_samples[choice]; + + n = timestamp__scnprintf_nsec(r->time - context_len, trange, sizeof trange); + trange[n++] = ','; + timestamp__scnprintf_nsec(r->time + context_len, trange + n, sizeof trange - n); + + timestamp__scnprintf_nsec(r->time, tsample, sizeof tsample); + + attr_to_script(extra_format, &evsel->attr); + + if (asprintf(&cmd, "%s script %s%s --time %s %s%s %s%s --ns %s %s %s %s %s | less +/%s", + perf, + input_name ? "-i " : "", + input_name ? input_name : "", + trange, + r->cpu >= 0 ? "--cpu " : "", + r->cpu >= 0 ? (sprintf(cpubuf, "%d", r->cpu), cpubuf) : "", + r->tid ? "--tid " : "", + r->tid ? (sprintf(tidbuf, "%d", r->tid), tidbuf) : "", + extra_format, + rstype == A_ASM ? "-F +insn --xed" : + rstype == A_SOURCE ? "-F +srcline,+srccode" : "", + symbol_conf.inline_name ? "--inline" : "", + "--show-lost-events ", + r->tid ? "--show-switch-events --show-task-events " : "", + tsample) < 0) + return -1; + run_script(cmd); + free(cmd); + return 0; +} diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 9e5f87558af6..cdba58447b85 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -125,7 +125,7 @@ out: return ret; } -static void run_script(char *cmd) +void run_script(char *cmd) { pr_debug("Running %s\n", cmd); SLang_reset_tty(); diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 34c0f00c68d1..1f230285d78a 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -436,6 +436,13 @@ static int hist_entry__init(struct hist_entry *he, goto err_rawdata; } + if (symbol_conf.res_sample) { + he->res_samples = calloc(sizeof(struct res_sample), + symbol_conf.res_sample); + if (!he->res_samples) + goto err_srcline; + } + INIT_LIST_HEAD(&he->pairs.node); thread__get(he->thread); he->hroot_in = RB_ROOT_CACHED; @@ -446,6 +453,9 @@ static int hist_entry__init(struct hist_entry *he, return 0; +err_srcline: + free(he->srcline); + err_rawdata: free(he->raw_data); @@ -603,6 +613,32 @@ out: return he; } +static unsigned random_max(unsigned high) +{ + unsigned thresh = -high % high; + for (;;) { + unsigned r = random(); + if (r >= thresh) + return r % high; + } +} + +static void hists__res_sample(struct hist_entry *he, struct perf_sample *sample) +{ + struct res_sample *r; + int j; + + if (he->num_res < symbol_conf.res_sample) { + j = he->num_res++; + } else { + j = random_max(symbol_conf.res_sample); + } + r = &he->res_samples[j]; + r->time = sample->time; + r->cpu = sample->cpu; + r->tid = sample->tid; +} + static struct hist_entry* __hists__add_entry(struct hists *hists, struct addr_location *al, @@ -650,6 +686,8 @@ __hists__add_entry(struct hists *hists, if (!hists->has_callchains && he && he->callchain_size != 0) hists->has_callchains = true; + if (he && symbol_conf.res_sample) + hists__res_sample(he, sample); return he; } @@ -1173,6 +1211,7 @@ void hist_entry__delete(struct hist_entry *he) mem_info__zput(he->mem_info); } + zfree(&he->res_samples); zfree(&he->stat_acc); free_srcline(he->srcline); if (he->srcfile && he->srcfile[0]) diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2113a6639cea..76ff6c6d03b8 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -433,6 +433,13 @@ struct hist_browser_timer { }; struct annotation_options; +struct res_sample; + +enum rstype { + A_NORMAL, + A_ASM, + A_SOURCE +}; #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" @@ -454,6 +461,11 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist, const char *help, struct annotation_options *annotation_options); int script_browse(const char *script_opt, struct perf_evsel *evsel); + +void run_script(char *cmd); +int res_sample_browse(struct res_sample *res_samples, int num_res, + struct perf_evsel *evsel, enum rstype rstype); +void res_sample_init(void); #else static inline int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, @@ -488,6 +500,16 @@ static inline int script_browse(const char *script_opt __maybe_unused, return 0; } +static inline int res_sample_browse(struct res_sample *res_samples __maybe_unused, + int num_res __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + enum rstype rstype __maybe_unused) +{ + return 0; +} + +static inline void res_sample_init(void) {} + #define K_LEFT -1000 #define K_RIGHT -2000 #define K_SWITCH_INPUT_DATA -3000 diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 19dceb7f6145..bb9442ab7a0c 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -47,6 +47,12 @@ extern struct sort_entry sort_srcline; extern enum sort_type sort__first_dimension; extern const char default_mem_sort_order[]; +struct res_sample { + u64 time; + int cpu; + int tid; +}; + struct he_stat { u64 period; u64 period_sys; @@ -140,6 +146,8 @@ struct hist_entry { struct mem_info *mem_info; void *raw_data; u32 raw_size; + int num_res; + struct res_sample *res_samples; void *trace_output; struct perf_hpp_list *hpp_list; struct hist_entry *parent_he; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6b73a0eeb6a1..58442ca5e3c4 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -51,6 +51,7 @@ struct symbol_conf symbol_conf = { .symfs = "", .event_group = true, .inline_name = true, + .res_sample = 0, }; static enum dso_binary_type binary_type_symtab[] = { diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h index a5684a71b78e..6c55fa6fccec 100644 --- a/tools/perf/util/symbol_conf.h +++ b/tools/perf/util/symbol_conf.h @@ -68,6 +68,7 @@ struct symbol_conf { struct intlist *pid_list, *tid_list; const char *symfs; + int res_sample; }; extern struct symbol_conf symbol_conf; -- cgit From ca52babe033f2a0a535ce7c814e54a44cead1f15 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:44:59 -0700 Subject: perf tools: Add some new tips describing the new options Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-9-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/tips.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 849599f39c5e..869965d629ce 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -15,6 +15,7 @@ To see callchains in a more compact form: perf report -g folded Show individual samples with: perf script Limit to show entries above 5% only: perf report --percent-limit 5 Profiling branch (mis)predictions with: perf record -b / perf report +To show assembler sample contexts use perf record -b / perf script -F +brstackinsn --xed Treat branches as callchains: perf report --branch-history To count events in every 1000 msec: perf stat -I 1000 Print event counts in CSV format with: perf stat -x, @@ -34,3 +35,9 @@ Show current config key-value pairs: perf config --list Show user configuration overrides: perf config --user --list To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node` To report cacheline events from previous recording: perf c2c report +To browse sample contexts use perf report --sample 10 and select in context menu +To separate samples by time use perf report --sort time,overhead,sym +To set sample time separation other than 100ms with --sort time use --time-quantum +Add -I to perf report to sample register values visible in perf report context. +To show IPC for sampling periods use perf record -e '{cycles,instructions}:S' and then browse context +To show context switches in perf report sample context add --switch-events to perf record. -- cgit From 905e4aff31382c3f9b2014d1361f4a1be4479ba2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:45:01 -0700 Subject: perf script: Add array bound checking to list_scripts Don't overflow array when the scripts directory is too large, or the script file name is too long. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-11-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 8 ++++++-- tools/perf/builtin.h | 3 ++- tools/perf/ui/browsers/scripts.c | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index b695b20ffc8a..2f93d60c5a17 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2982,7 +2982,8 @@ static int check_ev_match(char *dir_name, char *scriptname, * will list all statically runnable scripts, select one, execute it and * show the output in a perf browser. */ -int find_scripts(char **scripts_array, char **scripts_path_array) +int find_scripts(char **scripts_array, char **scripts_path_array, int num, + int pathlen) { struct dirent *script_dirent, *lang_dirent; char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; @@ -3027,7 +3028,10 @@ int find_scripts(char **scripts_array, char **scripts_path_array) /* Skip those real time scripts: xxxtop.p[yl] */ if (strstr(script_dirent->d_name, "top.")) continue; - sprintf(scripts_path_array[i], "%s/%s", lang_path, + if (i >= num) + break; + snprintf(scripts_path_array[i], pathlen, "%s/%s", + lang_path, script_dirent->d_name); temp = strchr(script_dirent->d_name, '.'); snprintf(scripts_array[i], diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 05745f3ce912..999fe9170122 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -40,5 +40,6 @@ int cmd_mem(int argc, const char **argv); int cmd_data(int argc, const char **argv); int cmd_ftrace(int argc, const char **argv); -int find_scripts(char **scripts_array, char **scripts_path_array); +int find_scripts(char **scripts_array, char **scripts_path_array, int num, + int pathlen); #endif diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index cdba58447b85..96e5cd3b0eee 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -97,7 +97,8 @@ static int list_scripts(char *script_name, bool *custom, paths[i] = names[i] + SCRIPT_NAMELEN; } - num = find_scripts(names + max_std, paths + max_std); + num = find_scripts(names + max_std, paths + max_std, SCRIPT_MAX_NO - max_std, + SCRIPT_FULLPATH_LEN); if (num < 0) num = 0; choice = ui__popup_menu(num + max_std, (char * const *)names); -- cgit From 59c24980dffbea2106fe65e64ea77834d657ee9c Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:45:02 -0700 Subject: perf ui browser: Fix ui popup argv browser for many entries Fix the argv ui browser code to correctly display more entries than fit on the screen without crashing. The problem was some type confusion with pointer types in the ->seek function. Do the argv arithmetic correctly with char ** pointers. Also add some asserts to find overruns and limit the display function correctly. Then finally remove a workaround for this in the res sample browser. Committer testing: 1) Resize the x terminal to have just some 5 lines 2) Use 'perf report --samples 1' to activate the sample browser options in the menu 3) Press ENTER, this will cause the crash: # perf report --samples 1 perf: Segmentation fault -------- backtrace -------- perf[0x5a514a] /lib64/libc.so.6(+0x385bf)[0x7f27281b55bf] /lib64/libc.so.6(+0x161a67)[0x7f27282dea67] /lib64/libslang.so.2(SLsmg_write_wrapped_string+0x82)[0x7f272874a0b2] perf(ui_browser__argv_refresh+0x77)[0x5939a7] perf[0x5924cc] perf(ui_browser__run+0x39)[0x593449] perf(ui__popup_menu+0x83)[0x5a5263] perf[0x59f421] perf(perf_evlist__tui_browse_hists+0x3a0)[0x5a3780] perf(cmd_report+0x2746)[0x447136] perf[0x4a95fe] perf(main+0x61c)[0x42dc6c] /lib64/libc.so.6(__libc_start_main+0xf2)[0x7f27281a1412] perf(_start+0x2d)[0x42de9d] # After applying this patch no crash takes place in such situation. Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-12-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browser.c | 10 +++++++--- tools/perf/ui/browsers/res_sample.c | 3 --- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 4f75561424ed..4ad37d8c7d6a 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -611,14 +611,16 @@ void ui_browser__argv_seek(struct ui_browser *browser, off_t offset, int whence) browser->top = browser->entries; break; case SEEK_CUR: - browser->top = browser->top + browser->top_idx + offset; + browser->top = (char **)browser->top + offset; break; case SEEK_END: - browser->top = browser->top + browser->nr_entries - 1 + offset; + browser->top = (char **)browser->entries + browser->nr_entries - 1 + offset; break; default: return; } + assert((char **)browser->top < (char **)browser->entries + browser->nr_entries); + assert((char **)browser->top >= (char **)browser->entries); } unsigned int ui_browser__argv_refresh(struct ui_browser *browser) @@ -630,7 +632,9 @@ unsigned int ui_browser__argv_refresh(struct ui_browser *browser) browser->top = browser->entries; pos = (char **)browser->top; - while (idx < browser->nr_entries) { + while (idx < browser->nr_entries && + row < (unsigned)SLtt_Screen_Rows - 1) { + assert(pos < (char **)browser->entries + browser->nr_entries); if (!browser->filter || !browser->filter(browser, *pos)) { ui_browser__gotorc(browser, row, 0); browser->write(browser, pos, row); diff --git a/tools/perf/ui/browsers/res_sample.c b/tools/perf/ui/browsers/res_sample.c index 884ef2a92c15..c0dd73176d42 100644 --- a/tools/perf/ui/browsers/res_sample.c +++ b/tools/perf/ui/browsers/res_sample.c @@ -36,9 +36,6 @@ int res_sample_browse(struct res_sample *res_samples, int num_res, struct res_sample *r; char extra_format[256]; - /* For now since ui__popup_menu doesn't like lists that don't fit */ - num_res = max(min(SLtt_Screen_Rows - 4, num_res), 0); - names = calloc(num_res, sizeof(char *)); if (!names) return -1; -- cgit From e3b74de50a5f8bbfacbd772874c8b5d9220ebcdb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 11 Mar 2019 07:45:00 -0700 Subject: perf tools report: Add custom scripts to script menu Add a way to define custom scripts through ~/.perfconfig, which are then added to the scripts menu. The scripts get the same arguments as 'perf script', in particular -i, --cpu, --tid. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20190311144502.15423-10-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 8 ++++++++ tools/perf/ui/browsers/scripts.c | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 2d0fb7613134..95054a8176a2 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -590,6 +590,14 @@ samples.*:: Define how many ns worth of time to show around samples in perf report sample context browser. +scripts.*:: + + Any option defines a script that is added to the scripts menu + in the interactive perf browser and whose output is displayed. + The name of the option is the name, the value is a script command line. + The script gets the same options passed as a full perf script, + in particular -i perfdata file, --cpu, --tid + SEE ALSO -------- linkperf:perf[1] diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c index 96e5cd3b0eee..27cf3ab88d13 100644 --- a/tools/perf/ui/browsers/scripts.c +++ b/tools/perf/ui/browsers/scripts.c @@ -6,6 +6,7 @@ #include "../../util/symbol.h" #include "../browser.h" #include "../libslang.h" +#include "config.h" #define SCRIPT_NAMELEN 128 #define SCRIPT_MAX_NO 64 @@ -53,6 +54,24 @@ static int add_script_option(const char *name, const char *opt, return 0; } +static int scripts_config(const char *var, const char *value, void *data) +{ + struct script_config *c = data; + + if (!strstarts(var, "scripts.")) + return -1; + if (c->index >= SCRIPT_MAX_NO) + return -1; + c->names[c->index] = strdup(var + 7); + if (!c->names[c->index]) + return -1; + if (asprintf(&c->paths[c->index], "%s %s", value, + c->extra_format) < 0) + return -1; + c->index++; + return 0; +} + /* * When success, will copy the full path of the selected script * into the buffer pointed by script_name, and return 0. @@ -87,6 +106,7 @@ static int list_scripts(char *script_name, bool *custom, &scriptc); add_script_option("Show individual samples with source", "-F +srcline,+srccode", &scriptc); + perf_config(scripts_config, &scriptc); custom_perf = scriptc.index; add_script_option("Show samples with custom perf script arguments", "", &scriptc); i = scriptc.index; -- cgit From dfcbc2f2994b8a3af3605a26dc29c07ad7378bf4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Mar 2019 17:07:52 -0300 Subject: tools lib bpf: Fix the build by adding a missing stdarg.h include The libbpf_print_fn_t typedef uses va_list without including the header where that type is defined, stdarg.h, breaking in places where we're unlucky for that type not to be already defined by some previously included header. Noticed while building on fedora 24 cross building tools/perf to the ARC architecture using the uClibc C library: 28 fedora:24-x-ARC-uClibc : FAIL arc-linux-gcc (ARCompact ISA Linux uClibc toolchain 2017.09-rc2) 7.1.1 20170710 CC /tmp/build/perf/tests/llvm.o In file included from tests/llvm.c:3:0: /git/linux/tools/lib/bpf/libbpf.h:57:20: error: unknown type name 'va_list' const char *, va_list ap); ^~~~~~~ /git/linux/tools/lib/bpf/libbpf.h:59:34: error: unknown type name 'libbpf_print_fn_t' LIBBPF_API void libbpf_set_print(libbpf_print_fn_t fn); ^~~~~~~~~~~~~~~~~ mv: cannot stat '/tmp/build/perf/tests/.llvm.o.tmp': No such file or directory Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jakub Kicinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Quentin Monnet Cc: Stanislav Fomichev Cc: Yonghong Song Fixes: a8a1f7d09cfc ("libbpf: fix libbpf_print") Link: https://lkml.kernel.org/n/tip-5270n2quu2gqz22o7itfdx00@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/libbpf.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index b4652aa1a58a..aa1521a51687 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -10,6 +10,7 @@ #ifndef __LIBBPF_LIBBPF_H #define __LIBBPF_LIBBPF_H +#include #include #include #include -- cgit From cdb8faa00e3fcdd0ad10add743516d616dc7d38e Mon Sep 17 00:00:00 2001 From: Petr Štetiar Date: Mon, 11 Mar 2019 22:08:22 +0100 Subject: mips: bcm47xx: Enable USB power on Netgear WNDR3400v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eric has reported on OpenWrt's bug tracking system[1], that he's not able to use USB devices on his WNDR3400v2 device after the boot, until he turns on GPIO #21 manually through sysfs. 1. https://bugs.openwrt.org/index.php?do=details&task_id=2170 Cc: Rafał Miłecki Cc: Hauke Mehrtens Reported-by: Eric Bohlman Tested-by: Eric Bohlman Signed-off-by: Petr Štetiar Signed-off-by: Paul Burton --- arch/mips/bcm47xx/workarounds.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/mips/bcm47xx/workarounds.c b/arch/mips/bcm47xx/workarounds.c index 46eddbec8d9f..0ab95dd431b3 100644 --- a/arch/mips/bcm47xx/workarounds.c +++ b/arch/mips/bcm47xx/workarounds.c @@ -24,6 +24,7 @@ void __init bcm47xx_workarounds(void) case BCM47XX_BOARD_NETGEAR_WNR3500L: bcm47xx_workarounds_enable_usb_power(12); break; + case BCM47XX_BOARD_NETGEAR_WNDR3400V2: case BCM47XX_BOARD_NETGEAR_WNDR3400_V3: bcm47xx_workarounds_enable_usb_power(21); break; -- cgit From 9a18b5a412baf23137c8fddb4ea7f0c14087f31c Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Mon, 11 Mar 2019 14:57:59 +0100 Subject: arch: arc: Kconfig: pedantic formatting Formatting of Kconfig files doesn't look so pretty, so let the Great White Handkerchief come around and clean it up. Signed-off-by: Enrico Weigelt, metux IT consult Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 12 ++++++------ arch/arc/plat-eznps/Kconfig | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 95fb11a85566..68401536e718 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -144,11 +144,11 @@ config ARC_CPU_770 Support for ARC770 core introduced with Rel 4.10 (Summer 2011) This core has a bunch of cool new features: -MMU-v3: Variable Page Sz (4k, 8k, 16k), bigger J-TLB (128x4) - Shared Address Spaces (for sharing TLB entries in MMU) + Shared Address Spaces (for sharing TLB entries in MMU) -Caches: New Prog Model, Region Flush -Insns: endian swap, load-locked/store-conditional, time-stamp-ctr -endif #ISA_ARCOMPACT +endif #ISA_ARCOMPACT config ARC_CPU_HS bool "ARC-HS" @@ -198,7 +198,7 @@ config ARC_SMP_HALT_ON_RESET at designated entry point. For other case, all jump to common entry point and spin wait for Master's signal. -endif #SMP +endif #SMP config ARC_MCIP bool "ARConnect Multicore IP (MCIP) Support " @@ -249,7 +249,7 @@ config ARC_CACHE_VIPT_ALIASING bool "Support VIPT Aliasing D$" depends on ARC_HAS_DCACHE && ISA_ARCOMPACT -endif #ARC_CACHE +endif #ARC_CACHE config ARC_HAS_ICCM bool "Use ICCM" @@ -370,7 +370,7 @@ config ARC_FPU_SAVE_RESTORE based on actual usage of FPU by a task. Thus our implemn does this for all tasks in system. -endif #ISA_ARCOMPACT +endif #ISA_ARCOMPACT config ARC_CANT_LLSC def_bool n @@ -423,7 +423,7 @@ config ARC_IRQ_NO_AUTOSAVE This is programmable and can be optionally disabled in which case software INTERRUPT_PROLOGUE/EPILGUE do the needed work -endif # ISA_ARCV2 +endif # ISA_ARCV2 endmenu # "ARC CPU Configuration" diff --git a/arch/arc/plat-eznps/Kconfig b/arch/arc/plat-eznps/Kconfig index 8eff057efcae..2eaecfb063a7 100644 --- a/arch/arc/plat-eznps/Kconfig +++ b/arch/arc/plat-eznps/Kconfig @@ -26,8 +26,8 @@ config EZNPS_MTM_EXT help Here we add new hierarchy for CPUs topology. We got: - Core - Thread + Core + Thread At the new thread level each CPU represent one HW thread. At highest hierarchy each core contain 16 threads, any of them seem like CPU from Linux point of view. @@ -35,10 +35,10 @@ config EZNPS_MTM_EXT core and HW scheduler round robin between them. config EZNPS_MEM_ERROR_ALIGN - bool "ARC-EZchip Memory error as an exception" - depends on EZNPS_MTM_EXT - default n - help + bool "ARC-EZchip Memory error as an exception" + depends on EZNPS_MTM_EXT + default n + help On the real chip of the NPS, user memory errors are handled as a machine check exception, which is fatal, whereas on simulator platform for NPS, is handled as a Level 2 interrupt -- cgit From 7a9b6be9fe58194d9a349159176e8cc0d8f10ef8 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 8 Mar 2019 13:02:16 -0800 Subject: arm64: bcm2835: Add missing dependency on MFD_CORE. When adding the MFD dependency for power domains and WDT in bcm2835, I added it only on the arm32 side and missed it for arm64. Fixes: 5e6acc3e678e ("bcm2835-pm: Move bcm2835-watchdog's DT probe to an MFD.") Signed-off-by: Eric Anholt Reported-by: Stefan Wahren Acked-by: Stefan Wahren --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index 251ecf34cb02..50f9fb562059 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -27,6 +27,7 @@ config ARCH_BCM2835 bool "Broadcom BCM2835 family" select TIMER_OF select GPIOLIB + select MFD_CORE select PINCTRL select PINCTRL_BCM2835 select ARM_AMBA -- cgit From d6f1837107c00fa7b2fdb606acf65b33b455dbfd Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 11 Mar 2019 22:21:09 -0700 Subject: selftests/bpf: fix segfault of test_progs when prog loading failed The test_progs subtests, test_spin_lock() and test_map_lock(), requires BTF present to run successfully. Currently, when BTF failed to load, test_progs will segfault, $ ./test_progs ... 12: (bf) r1 = r8 13: (85) call bpf_spin_lock#93 map 'hash_map' has to have BTF in order to use bpf_spin_lock libbpf: -- END LOG -- libbpf: failed to load program 'map_lock_demo' libbpf: failed to load object './test_map_lock.o' test_map_lock:bpf_prog_load errno 13 Segmentation fault The segfault is caused by uninitialized variable "obj", which is used in bpf_object__close(obj), when bpf prog failed to load. Initializing variable "obj" to NULL in two occasions fixed the problem. $ ./test_progs ... Summary: 219 PASSED, 2 FAILED Fixes: b4d4556c3266 ("selftests/bpf: add bpf_spin_lock verifier tests") Fixes: ba72a7b4badb ("selftests/bpf: test for BPF_F_LOCK") Reported-by: Daniel Borkmann Signed-off-by: Yonghong Song Acked-by: Song Liu Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/prog_tests/map_lock.c | 2 +- tools/testing/selftests/bpf/prog_tests/spinlock.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c index 90f8a206340a..ee99368c595c 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c @@ -37,7 +37,7 @@ void test_map_lock(void) const char *file = "./test_map_lock.o"; int prog_fd, map_fd[2], vars[17] = {}; pthread_t thread_id[6]; - struct bpf_object *obj; + struct bpf_object *obj = NULL; int err = 0, key = 0, i; void *ret; diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c index 9a573a9675d7..114ebe6a438e 100644 --- a/tools/testing/selftests/bpf/prog_tests/spinlock.c +++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c @@ -5,7 +5,7 @@ void test_spinlock(void) { const char *file = "./test_spin_lock.o"; pthread_t thread_id[4]; - struct bpf_object *obj; + struct bpf_object *obj = NULL; int prog_fd; int err = 0, i; void *ret; -- cgit From 6bf21b54a596d60905cfc7e8af8e2fe16d9fe7e9 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 12 Mar 2019 09:59:45 +0100 Subject: libbpf: fix to reject unknown flags in xsk_socket__create() In xsk_socket__create(), the libbpf_flags field was not checked for setting currently unused/unknown flags. This patch fixes that by returning -EINVAL if the user has set any flag that is not in use at this point in time. Fixes: 1cad07884239 ("libbpf: add support for using AF_XDP sockets") Signed-off-by: Magnus Karlsson Reviewed-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- tools/lib/bpf/xsk.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index f98ac82c9aea..8d0078b65486 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -126,8 +126,8 @@ static void xsk_set_umem_config(struct xsk_umem_config *cfg, cfg->frame_headroom = usr_cfg->frame_headroom; } -static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, - const struct xsk_socket_config *usr_cfg) +static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) { if (!usr_cfg) { cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; @@ -135,14 +135,19 @@ static void xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, cfg->libbpf_flags = 0; cfg->xdp_flags = 0; cfg->bind_flags = 0; - return; + return 0; } + if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) + return -EINVAL; + cfg->rx_size = usr_cfg->rx_size; cfg->tx_size = usr_cfg->tx_size; cfg->libbpf_flags = usr_cfg->libbpf_flags; cfg->xdp_flags = usr_cfg->xdp_flags; cfg->bind_flags = usr_cfg->bind_flags; + + return 0; } int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, @@ -557,7 +562,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, } strncpy(xsk->ifname, ifname, IFNAMSIZ); - xsk_set_xdp_socket_config(&xsk->config, usr_config); + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_socket; if (rx) { err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, -- cgit From 2795e8c251614ac0784c9d41008551109f665716 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 02:25:17 -0500 Subject: net: ieee802154: fix a potential NULL pointer dereference In case alloc_ordered_workqueue fails, the fix releases sources and returns -ENOMEM to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Acked-by: Michael Hennerich Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/adf7242.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ieee802154/adf7242.c b/drivers/net/ieee802154/adf7242.c index cd1d8faccca5..cd6b95e673a5 100644 --- a/drivers/net/ieee802154/adf7242.c +++ b/drivers/net/ieee802154/adf7242.c @@ -1268,6 +1268,10 @@ static int adf7242_probe(struct spi_device *spi) INIT_DELAYED_WORK(&lp->work, adf7242_rx_cal_work); lp->wqueue = alloc_ordered_workqueue(dev_name(&spi->dev), WQ_MEM_RECLAIM); + if (unlikely(!lp->wqueue)) { + ret = -ENOMEM; + goto err_hw_init; + } ret = adf7242_hw_init(lp); if (ret) -- cgit From 19b39a25388e71390e059906c979f87be4ef0c71 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 19 Feb 2019 13:10:29 +0800 Subject: ieee802154: hwsim: propagate genlmsg_reply return code genlmsg_reply can fail, so propagate its return code Signed-off-by: Li RongQing Signed-off-by: Stefan Schmidt --- drivers/net/ieee802154/mac802154_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ieee802154/mac802154_hwsim.c b/drivers/net/ieee802154/mac802154_hwsim.c index b6743f03dce0..3b88846de31b 100644 --- a/drivers/net/ieee802154/mac802154_hwsim.c +++ b/drivers/net/ieee802154/mac802154_hwsim.c @@ -324,7 +324,7 @@ static int hwsim_get_radio_nl(struct sk_buff *msg, struct genl_info *info) goto out_err; } - genlmsg_reply(skb, info); + res = genlmsg_reply(skb, info); break; } -- cgit From bf504110bc8aa05df48b0e5f0aa84bfb81e0574b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 4 Mar 2019 14:06:12 +0000 Subject: Btrfs: fix incorrect file size after shrinking truncate and fsync If we do a shrinking truncate against an inode which is already present in the respective log tree and then rename it, as part of logging the new name we end up logging an inode item that reflects the old size of the file (the one which we previously logged) and not the new smaller size. The decision to preserve the size previously logged was added by commit 1a4bcf470c886b ("Btrfs: fix fsync data loss after adding hard link to inode") in order to avoid data loss after replaying the log. However that decision is only needed for the case the logged inode size is smaller then the current size of the inode, as explained in that commit's change log. If the current size of the inode is smaller then the previously logged size, we know a shrinking truncate happened and therefore need to use that smaller size. Example to trigger the problem: $ mkfs.btrfs -f /dev/sdb $ mount /dev/sdb /mnt $ xfs_io -f -c "pwrite -S 0xab 0 8000" /mnt/foo $ xfs_io -c "fsync" /mnt/foo $ xfs_io -c "truncate 3000" /mnt/foo $ mv /mnt/foo /mnt/bar $ xfs_io -c "fsync" /mnt/bar $ mount /dev/sdb /mnt $ od -t x1 -A d /mnt/bar 0000000 ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab ab * 0008000 Once we rename the file, we log its name (and inode item), and because the inode was already logged before in the current transaction, we log it with a size of 8000 bytes because that is the size we previously logged (with the first fsync). As part of the rename, besides logging the inode, we do also sync the log, which is done since commit d4682ba03ef618 ("Btrfs: sync log after logging new name"), so the next fsync against our inode is effectively a no-op, since no new changes happened since the rename operation. Even if did not sync the log during the rename operation, the same problem (fize size of 8000 bytes instead of 3000 bytes) would be visible after replaying the log if the log ended up getting synced to disk through some other means, such as for example by fsyncing some other modified file. In the example above the fsync after the rename operation is there just because not every filesystem may guarantee logging/journalling the inode (and syncing the log/journal) during the rename operation, for example it is needed for f2fs, but not for ext4 and xfs. Fix this scenario by, when logging a new name (which is triggered by rename and link operations), using the current size of the inode instead of the previously logged inode size. A test case for fstests follows soon. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202695 CC: stable@vger.kernel.org # 4.4+ Reported-by: Seulbae Kim Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f06454a55e00..5256cddf3a43 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4544,6 +4544,19 @@ static int logged_inode_size(struct btrfs_root *log, struct btrfs_inode *inode, item = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_inode_item); *size_ret = btrfs_inode_size(path->nodes[0], item); + /* + * If the in-memory inode's i_size is smaller then the inode + * size stored in the btree, return the inode's i_size, so + * that we get a correct inode size after replaying the log + * when before a power failure we had a shrinking truncate + * followed by addition of a new name (rename / new hard link). + * Otherwise return the inode size from the btree, to avoid + * data loss when replaying a log due to previously doing a + * write that expands the inode's size and logging a new name + * immediately after. + */ + if (*size_ret > inode->vfs_inode.i_size) + *size_ret = inode->vfs_inode.i_size; } btrfs_release_path(path); -- cgit From 2cc8334270e281815c3850c3adea363c51f21e0d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 6 Mar 2019 17:13:04 -0500 Subject: btrfs: remove WARN_ON in log_dir_items When Filipe added the recursive directory logging stuff in 2f2ff0ee5e430 ("Btrfs: fix metadata inconsistencies after directory fsync") he specifically didn't take the directory i_mutex for the children directories that we need to log because of lockdep. This is generally fine, but can lead to this WARN_ON() tripping if we happen to run delayed deletion's in between our first search and our second search of dir_item/dir_indexes for this directory. We expect this to happen, so the WARN_ON() isn't necessary. Drop the WARN_ON() and add a comment so we know why this case can happen. CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Filipe Manana Signed-off-by: Josef Bacik Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 5256cddf3a43..960ea49a7a0f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3578,9 +3578,16 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, } btrfs_release_path(path); - /* find the first key from this transaction again */ + /* + * Find the first key from this transaction again. See the note for + * log_new_dir_dentries, if we're logging a directory recursively we + * won't be holding its i_mutex, which means we can modify the directory + * while we're logging it. If we remove an entry between our first + * search and this search we'll not find the key again and can just + * bail. + */ ret = btrfs_search_slot(NULL, root, &min_key, path, 0, 0); - if (WARN_ON(ret != 0)) + if (ret != 0) goto done; /* -- cgit From 609e804d771f59dc5d45a93e5ee0053c74bbe2bf Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 27 Feb 2019 13:42:30 +0000 Subject: Btrfs: fix file corruption after snapshotting due to mix of buffered/DIO writes When we are mixing buffered writes with direct IO writes against the same file and snapshotting is happening concurrently, we can end up with a corrupt file content in the snapshot. Example: 1) Inode/file is empty. 2) Snapshotting starts. 2) Buffered write at offset 0 length 256Kb. This updates the i_size of the inode to 256Kb, disk_i_size remains zero. This happens after the task doing the snapshot flushes all existing delalloc. 3) DIO write at offset 256Kb length 768Kb. Once the ordered extent completes it sets the inode's disk_i_size to 1Mb (256Kb + 768Kb) and updates the inode item in the fs tree with a size of 1Mb (which is the value of disk_i_size). 4) The dealloc for the range [0, 256Kb[ did not start yet. 5) The transaction used in the DIO ordered extent completion, which updated the inode item, is committed by the snapshotting task. 6) Snapshot creation completes. 7) Dealloc for the range [0, 256Kb[ is flushed. After that when reading the file from the snapshot we always get zeroes for the range [0, 256Kb[, the file has a size of 1Mb and the data written by the direct IO write is found. From an application's point of view this is a corruption, since in the source subvolume it could never read a version of the file that included the data from the direct IO write without the data from the buffered write included as well. In the snapshot's tree, file extent items are missing for the range [0, 256Kb[. The issue, obviously, does not happen when using the -o flushoncommit mount option. Fix this by flushing delalloc for all the roots that are about to be snapshotted when committing a transaction. This guarantees total ordering when updating the disk_i_size of an inode since the flush for dealloc is done when a transaction is in the TRANS_STATE_COMMIT_START state and wait is done once no more external writers exist. This is similar to what we do when using the flushoncommit mount option, but we do it only if the transaction has snapshots to create and only for the roots of the subvolumes to be snapshotted. The bulk of the dealloc is flushed in the snapshot creation ioctl, so the flush work we do inside the transaction is minimized. This issue, involving buffered and direct IO writes with snapshotting, is often triggered by fstest btrfs/078, and got reported by fsck when not using the NO_HOLES features, for example: $ cat results/btrfs/078.full (...) _check_btrfs_filesystem: filesystem on /dev/sdc is inconsistent *** fsck.btrfs output *** [1/7] checking root items [2/7] checking extents [3/7] checking free space cache [4/7] checking fs roots root 258 inode 264 errors 100, file extent discount Found file extent holes: start: 524288, len: 65536 ERROR: errors found in fs roots Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/transaction.c | 49 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index acdad6d658f5..e4e665f422fc 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1886,8 +1886,10 @@ static void btrfs_cleanup_pending_block_groups(struct btrfs_trans_handle *trans) } } -static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) +static inline int btrfs_start_delalloc_flush(struct btrfs_trans_handle *trans) { + struct btrfs_fs_info *fs_info = trans->fs_info; + /* * We use writeback_inodes_sb here because if we used * btrfs_start_delalloc_roots we would deadlock with fs freeze. @@ -1897,15 +1899,50 @@ static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) * from already being in a transaction and our join_transaction doesn't * have to re-take the fs freeze lock. */ - if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { writeback_inodes_sb(fs_info->sb, WB_REASON_SYNC); + } else { + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + + /* + * Flush dellaloc for any root that is going to be snapshotted. + * This is done to avoid a corrupted version of files, in the + * snapshots, that had both buffered and direct IO writes (even + * if they were done sequentially) due to an unordered update of + * the inode's size on disk. + */ + list_for_each_entry(pending, head, list) { + int ret; + + ret = btrfs_start_delalloc_snapshot(pending->root); + if (ret) + return ret; + } + } return 0; } -static inline void btrfs_wait_delalloc_flush(struct btrfs_fs_info *fs_info) +static inline void btrfs_wait_delalloc_flush(struct btrfs_trans_handle *trans) { - if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) + struct btrfs_fs_info *fs_info = trans->fs_info; + + if (btrfs_test_opt(fs_info, FLUSHONCOMMIT)) { btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); + } else { + struct btrfs_pending_snapshot *pending; + struct list_head *head = &trans->transaction->pending_snapshots; + + /* + * Wait for any dellaloc that we started previously for the roots + * that are going to be snapshotted. This is to avoid a corrupted + * version of files in the snapshots that had both buffered and + * direct IO writes (even if they were done sequentially). + */ + list_for_each_entry(pending, head, list) + btrfs_wait_ordered_extents(pending->root, + U64_MAX, 0, U64_MAX); + } } int btrfs_commit_transaction(struct btrfs_trans_handle *trans) @@ -2023,7 +2060,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) extwriter_counter_dec(cur_trans, trans->type); - ret = btrfs_start_delalloc_flush(fs_info); + ret = btrfs_start_delalloc_flush(trans); if (ret) goto cleanup_transaction; @@ -2039,7 +2076,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans) if (ret) goto cleanup_transaction; - btrfs_wait_delalloc_flush(fs_info); + btrfs_wait_delalloc_flush(trans); btrfs_scrub_pause(fs_info); /* -- cgit From 0cc068e6ee59c1fffbfa977d8bf868b7551d80ac Mon Sep 17 00:00:00 2001 From: David Sterba Date: Thu, 7 Mar 2019 15:40:50 +0100 Subject: btrfs: don't report readahead errors and don't update statistics As readahead is an optimization, all errors are usually filtered out, but still properly handled when the real read call is done. The commit 5e9d398240b2 ("btrfs: readpages() should submit IO as read-ahead") added REQ_RAHEAD to readpages() because that's only used for readahead (despite what one would expect from the callback name). This causes a flood of messages and inflated read error stats, so skip reporting in case it's readahead. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202403 Reported-by: LimeTech Fixes: 5e9d398240b2 ("btrfs: readpages() should submit IO as read-ahead") CC: stable@vger.kernel.org # 4.19+ Signed-off-by: David Sterba --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 9024eee889b9..db934ceae9c1 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -6407,7 +6407,7 @@ static void btrfs_end_bio(struct bio *bio) if (bio_op(bio) == REQ_OP_WRITE) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - else + else if (!(bio->bi_opf & REQ_RAHEAD)) btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_READ_ERRS); if (bio->bi_opf & REQ_PREFLUSH) -- cgit From 1b986589680a2a5b6fc1ac196ea69925a93d9dd9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:02 -0700 Subject: bpf: Fix bpf_tcp_sock and bpf_sk_fullsock issue related to bpf_sk_release Lorenz Bauer [thanks!] reported that a ptr returned by bpf_tcp_sock(sk) can still be accessed after bpf_sk_release(sk). Both bpf_tcp_sock() and bpf_sk_fullsock() have the same issue. This patch addresses them together. A simple reproducer looks like this: sk = bpf_sk_lookup_tcp(); /* if (!sk) ... */ tp = bpf_tcp_sock(sk); /* if (!tp) ... */ bpf_sk_release(sk); snd_cwnd = tp->snd_cwnd; /* oops! The verifier does not complain. */ The problem is the verifier did not scrub the register's states of the tcp_sock ptr (tp) after bpf_sk_release(sk). [ Note that when calling bpf_tcp_sock(sk), the sk is not always refcount-acquired. e.g. bpf_tcp_sock(skb->sk). The verifier works fine for this case. ] Currently, the verifier does not track if a helper's return ptr (in REG_0) is "carry"-ing one of its argument's refcount status. To carry this info, the reg1->id needs to be stored in reg0. One approach was tried, like "reg0->id = reg1->id", when calling "bpf_tcp_sock()". The main idea was to avoid adding another "ref_obj_id" for the same reg. However, overlapping the NULL marking and ref tracking purpose in one "id" does not work well: ref_sk = bpf_sk_lookup_tcp(); fullsock = bpf_sk_fullsock(ref_sk); tp = bpf_tcp_sock(ref_sk); if (!fullsock) { bpf_sk_release(ref_sk); return 0; } /* fullsock_reg->id is marked for NOT-NULL. * Same for tp_reg->id because they have the same id. */ /* oops. verifier did not complain about the missing !tp check */ snd_cwnd = tp->snd_cwnd; Hence, a new "ref_obj_id" is needed in "struct bpf_reg_state". With a new ref_obj_id, when bpf_sk_release(sk) is called, the verifier can scrub all reg states which has a ref_obj_id match. It is done with the changes in release_reg_references() in this patch. While fixing it, sk_to_full_sk() is removed from bpf_tcp_sock() and bpf_sk_fullsock() to avoid these helpers from returning another ptr. It will make bpf_sk_release(tp) possible: sk = bpf_sk_lookup_tcp(); /* if (!sk) ... */ tp = bpf_tcp_sock(sk); /* if (!tp) ... */ bpf_sk_release(tp); A separate helper "bpf_get_listener_sock()" will be added in a later patch to do sk_to_full_sk(). Misc change notes: - To allow bpf_sk_release(tp), the arg of bpf_sk_release() is changed from ARG_PTR_TO_SOCKET to ARG_PTR_TO_SOCK_COMMON. ARG_PTR_TO_SOCKET is removed from bpf.h since no helper is using it. - arg_type_is_refcounted() is renamed to arg_type_may_be_refcounted() because ARG_PTR_TO_SOCK_COMMON is the only one and skb->sk is not refcounted. All bpf_sk_release(), bpf_sk_fullsock() and bpf_tcp_sock() take ARG_PTR_TO_SOCK_COMMON. - check_refcount_ok() ensures is_acquire_function() cannot take arg_type_may_be_refcounted() as its argument. - The check_func_arg() can only allow one refcount-ed arg. It is guaranteed by check_refcount_ok() which ensures at most one arg can be refcounted. Hence, it is a verifier internal error if >1 refcount arg found in check_func_arg(). - In release_reference(), release_reference_state() is called first to ensure a match on "reg->ref_obj_id" can be found before scrubbing the reg states with release_reg_references(). - reg_is_refcounted() is no longer needed. 1. In mark_ptr_or_null_regs(), its usage is replaced by "ref_obj_id && ref_obj_id == id" because, when is_null == true, release_reference_state() should only be called on the ref_obj_id obtained by a acquire helper (i.e. is_acquire_function() == true). Otherwise, the following would happen: sk = bpf_sk_lookup_tcp(); /* if (!sk) { ... } */ fullsock = bpf_sk_fullsock(sk); if (!fullsock) { /* * release_reference_state(fullsock_reg->ref_obj_id) * where fullsock_reg->ref_obj_id == sk_reg->ref_obj_id. * * Hence, the following bpf_sk_release(sk) will fail * because the ref state has already been released in the * earlier release_reference_state(fullsock_reg->ref_obj_id). */ bpf_sk_release(sk); } 2. In release_reg_references(), the current reg_is_refcounted() call is unnecessary because the id check is enough. - The type_is_refcounted() and type_is_refcounted_or_null() are no longer needed also because reg_is_refcounted() is removed. Fixes: 655a51e536c0 ("bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock") Reported-by: Lorenz Bauer Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 1 - include/linux/bpf_verifier.h | 40 +++++++++++++ kernel/bpf/verifier.c | 131 ++++++++++++++++++++++++------------------- net/core/filter.c | 6 +- 4 files changed, 115 insertions(+), 63 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a2132e09dc1c..f02367faa58d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -193,7 +193,6 @@ enum bpf_arg_type { ARG_PTR_TO_CTX, /* pointer to context */ ARG_ANYTHING, /* any (initialized) argument is ok */ - ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */ ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ }; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 69f7a3449eda..7d8228d1c898 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,6 +66,46 @@ struct bpf_reg_state { * same reference to the socket, to determine proper reference freeing. */ u32 id; + /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned + * from a pointer-cast helper, bpf_sk_fullsock() and + * bpf_tcp_sock(). + * + * Consider the following where "sk" is a reference counted + * pointer returned from "sk = bpf_sk_lookup_tcp();": + * + * 1: sk = bpf_sk_lookup_tcp(); + * 2: if (!sk) { return 0; } + * 3: fullsock = bpf_sk_fullsock(sk); + * 4: if (!fullsock) { bpf_sk_release(sk); return 0; } + * 5: tp = bpf_tcp_sock(fullsock); + * 6: if (!tp) { bpf_sk_release(sk); return 0; } + * 7: bpf_sk_release(sk); + * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain + * + * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and + * "tp" ptr should be invalidated also. In order to do that, + * the reg holding "fullsock" and "sk" need to remember + * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id + * such that the verifier can reset all regs which have + * ref_obj_id matching the sk_reg->id. + * + * sk_reg->ref_obj_id is set to sk_reg->id at line 1. + * sk_reg->id will stay as NULL-marking purpose only. + * After NULL-marking is done, sk_reg->id can be reset to 0. + * + * After "fullsock = bpf_sk_fullsock(sk);" at line 3, + * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id. + * + * After "tp = bpf_tcp_sock(fullsock);" at line 5, + * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id + * which is the same as sk_reg->ref_obj_id. + * + * From the verifier perspective, if sk, fullsock and tp + * are not NULL, they are the same ptr with different + * reg->type. In particular, bpf_sk_release(tp) is also + * allowed and has the same effect as bpf_sk_release(sk). + */ + u32 ref_obj_id; /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ce166a002d16..86f9cd5d1c4e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -212,7 +212,7 @@ struct bpf_call_arg_meta { int access_size; s64 msize_smax_value; u64 msize_umax_value; - int ptr_id; + int ref_obj_id; int func_id; }; @@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type) type == PTR_TO_TCP_SOCK_OR_NULL; } -static bool type_is_refcounted(enum bpf_reg_type type) -{ - return type == PTR_TO_SOCKET; -} - -static bool type_is_refcounted_or_null(enum bpf_reg_type type) -{ - return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL; -} - -static bool reg_is_refcounted(const struct bpf_reg_state *reg) -{ - return type_is_refcounted(reg->type); -} - static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) { return reg->type == PTR_TO_MAP_VALUE && map_value_has_spin_lock(reg->map_ptr); } -static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg) +static bool arg_type_may_be_refcounted(enum bpf_arg_type type) { - return type_is_refcounted_or_null(reg->type); -} - -static bool arg_type_is_refcounted(enum bpf_arg_type type) -{ - return type == ARG_PTR_TO_SOCKET; + return type == ARG_PTR_TO_SOCK_COMMON; } /* Determine whether the function releases some resources allocated by another @@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id) func_id == BPF_FUNC_sk_lookup_udp; } +static bool is_ptr_cast_function(enum bpf_func_id func_id) +{ + return func_id == BPF_FUNC_tcp_sock || + func_id == BPF_FUNC_sk_fullsock; +} + /* string representation of 'enum bpf_reg_type' */ static const char * const reg_type_str[] = { [NOT_INIT] = "?", @@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (t == PTR_TO_STACK) verbose(env, ",call_%d", func(env, reg)->callsite); } else { - verbose(env, "(id=%d", reg->id); + verbose(env, "(id=%d ref_obj_id=%d", reg->id, + reg->ref_obj_id); if (t != SCALAR_VALUE) verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) @@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ if (!type_is_sk_pointer(type)) goto err_type; - } else if (arg_type == ARG_PTR_TO_SOCKET) { - expected_type = PTR_TO_SOCKET; - if (type != expected_type) - goto err_type; - if (meta->ptr_id || !reg->id) { - verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n", - meta->ptr_id, reg->id); - return -EFAULT; + if (reg->ref_obj_id) { + if (meta->ref_obj_id) { + verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", + regno, reg->ref_obj_id, + meta->ref_obj_id); + return -EFAULT; + } + meta->ref_obj_id = reg->ref_obj_id; } - meta->ptr_id = reg->id; } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { if (meta->func_id == BPF_FUNC_spin_lock) { if (process_spin_lock(env, regno, true)) @@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn) return true; } -static bool check_refcount_ok(const struct bpf_func_proto *fn) +static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) { int count = 0; - if (arg_type_is_refcounted(fn->arg1_type)) + if (arg_type_may_be_refcounted(fn->arg1_type)) count++; - if (arg_type_is_refcounted(fn->arg2_type)) + if (arg_type_may_be_refcounted(fn->arg2_type)) count++; - if (arg_type_is_refcounted(fn->arg3_type)) + if (arg_type_may_be_refcounted(fn->arg3_type)) count++; - if (arg_type_is_refcounted(fn->arg4_type)) + if (arg_type_may_be_refcounted(fn->arg4_type)) count++; - if (arg_type_is_refcounted(fn->arg5_type)) + if (arg_type_may_be_refcounted(fn->arg5_type)) count++; + /* A reference acquiring function cannot acquire + * another refcounted ptr. + */ + if (is_acquire_function(func_id) && count) + return false; + /* We only support one arg being unreferenced at the moment, * which is sufficient for the helper functions we have right now. */ return count <= 1; } -static int check_func_proto(const struct bpf_func_proto *fn) +static int check_func_proto(const struct bpf_func_proto *fn, int func_id) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && - check_refcount_ok(fn) ? 0 : -EINVAL; + check_refcount_ok(fn, func_id) ? 0 : -EINVAL; } /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END] @@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) } static void release_reg_references(struct bpf_verifier_env *env, - struct bpf_func_state *state, int id) + struct bpf_func_state *state, + int ref_obj_id) { struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) - if (regs[i].id == id) + if (regs[i].ref_obj_id == ref_obj_id) mark_reg_unknown(env, regs, i); bpf_for_each_spilled_reg(i, state, reg) { if (!reg) continue; - if (reg_is_refcounted(reg) && reg->id == id) + if (reg->ref_obj_id == ref_obj_id) __mark_reg_unknown(reg); } } @@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env, * resources. Identify all copies of the same pointer and clear the reference. */ static int release_reference(struct bpf_verifier_env *env, - struct bpf_call_arg_meta *meta) + int ref_obj_id) { struct bpf_verifier_state *vstate = env->cur_state; + int err; int i; + err = release_reference_state(cur_func(env), ref_obj_id); + if (err) + return err; + for (i = 0; i <= vstate->curframe; i++) - release_reg_references(env, vstate->frame[i], meta->ptr_id); + release_reg_references(env, vstate->frame[i], ref_obj_id); - return release_reference_state(cur_func(env), meta->ptr_id); + return 0; } static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, @@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn memset(&meta, 0, sizeof(meta)); meta.pkt_access = fn->pkt_access; - err = check_func_proto(fn); + err = check_func_proto(fn, func_id); if (err) { verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id); @@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return err; } } else if (is_release_function(func_id)) { - err = release_reference(env, &meta); + err = release_reference(env, meta.ref_obj_id); if (err) { verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id); @@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn if (id < 0) return id; - /* For release_reference() */ + /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = id; + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = id; } else { /* For mark_ptr_or_null_reg() */ regs[BPF_REG_0].id = ++env->id_gen; @@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (is_ptr_cast_function(func_id)) + /* For release_reference() */ + regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; + do_refine_retval_range(regs, fn->ret_type, func_id, &meta); err = check_map_func_compatibility(env, meta.map_ptr, func_id); @@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { reg->type = PTR_TO_TCP_SOCK; } - if (is_null || !(reg_is_refcounted(reg) || - reg_may_point_to_spin_lock(reg))) { - /* We don't need id from this point onwards anymore, - * thus we should better reset it, so that state - * pruning has chances to take effect. + if (is_null) { + /* We don't need id and ref_obj_id from this point + * onwards anymore, thus we should better reset it, + * so that state pruning has chances to take effect. + */ + reg->id = 0; + reg->ref_obj_id = 0; + } else if (!reg_may_point_to_spin_lock(reg)) { + /* For not-NULL ptr, reg->ref_obj_id will be reset + * in release_reg_references(). + * + * reg->id is still used by spin_lock ptr. Other + * than spin_lock ptr type, reg->id can be reset. */ reg->id = 0; } @@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, { struct bpf_func_state *state = vstate->frame[vstate->curframe]; struct bpf_reg_state *reg, *regs = state->regs; + u32 ref_obj_id = regs[regno].ref_obj_id; u32 id = regs[regno].id; int i, j; - if (reg_is_refcounted_or_null(®s[regno]) && is_null) - release_reference_state(state, id); + if (ref_obj_id && ref_obj_id == id && is_null) + /* regs[regno] is in the " == NULL" branch. + * No one could have freed the reference state before + * doing the NULL check. + */ + WARN_ON_ONCE(release_reference_state(state, id)); for (i = 0; i < MAX_BPF_REG; i++) mark_ptr_or_null_reg(state, ®s[i], id, is_null); diff --git a/net/core/filter.c b/net/core/filter.c index f274620945ff..36b6afacf83c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = { BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk) { - sk = sk_to_full_sk(sk); - return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL; } @@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = { .func = bpf_sk_release, .gpl_only = false, .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_SOCKET, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, }; BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx, @@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type, BPF_CALL_1(bpf_tcp_sock, struct sock *, sk) { - sk = sk_to_full_sk(sk); - if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP) return (unsigned long)sk; -- cgit From dbafd7ddd62369b2f3926ab847cbf8fc40e800b7 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:04 -0700 Subject: bpf: Add bpf_get_listener_sock(struct bpf_sock *sk) helper Add a new helper "struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)" which returns a bpf_sock in TCP_LISTEN state. It will trace back to the listener sk from a request_sock if possible. It returns NULL for all other cases. No reference is taken because the helper ensures the sk is in SOCK_RCU_FREE (where the TCP_LISTEN sock should be in). Hence, bpf_sk_release() is unnecessary and the verifier does not allow bpf_sk_release(listen_sk) to be called either. The following is also allowed because the bpf_prog is run under rcu_read_lock(): sk = bpf_sk_lookup_tcp(); /* if (!sk) { ... } */ listen_sk = bpf_get_listener_sock(sk); /* if (!listen_sk) { ... } */ bpf_sk_release(sk); src_port = listen_sk->src_port; /* Allowed */ Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 11 ++++++++++- net/core/filter.c | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3c38ac9a92a7..983b25cb608d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2366,6 +2366,14 @@ union bpf_attr { * current value is ect (ECN capable). Works with IPv6 and IPv4. * Return * 1 if set, 0 if not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in TCP_LISTEN state. + * bpf_sk_release() is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2465,7 +2473,8 @@ union bpf_attr { FN(spin_unlock), \ FN(sk_fullsock), \ FN(tcp_sock), \ - FN(skb_ecn_set_ce), + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/net/core/filter.c b/net/core/filter.c index 36b6afacf83c..647c63a7b25b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -5418,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = { .arg1_type = ARG_PTR_TO_SOCK_COMMON, }; +BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk) +{ + sk = sk_to_full_sk(sk); + + if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE)) + return (unsigned long)sk; + + return (unsigned long)NULL; +} + +static const struct bpf_func_proto bpf_get_listener_sock_proto = { + .func = bpf_get_listener_sock, + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, +}; + BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb) { unsigned int iphdr_len; @@ -5603,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_INET case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; + case BPF_FUNC_get_listener_sock: + return &bpf_get_listener_sock_proto; case BPF_FUNC_skb_ecn_set_ce: return &bpf_skb_ecn_set_ce_proto; #endif @@ -5698,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_release_proto; case BPF_FUNC_tcp_sock: return &bpf_tcp_sock_proto; + case BPF_FUNC_get_listener_sock: + return &bpf_get_listener_sock_proto; #endif default: return bpf_base_func_proto(func_id); -- cgit From ef776a272b093fb52612e6d8f4b3e77f9bb49554 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:06 -0700 Subject: bpf: Sync bpf.h to tools/ This patch sync the uapi bpf.h to tools/. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3c38ac9a92a7..983b25cb608d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2366,6 +2366,14 @@ union bpf_attr { * current value is ect (ECN capable). Works with IPv6 and IPv4. * Return * 1 if set, 0 if not set. + * + * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) + * Description + * Return a **struct bpf_sock** pointer in TCP_LISTEN state. + * bpf_sk_release() is unnecessary and not allowed. + * Return + * A **struct bpf_sock** pointer on success, or NULL in + * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2465,7 +2473,8 @@ union bpf_attr { FN(spin_unlock), \ FN(sk_fullsock), \ FN(tcp_sock), \ - FN(skb_ecn_set_ce), + FN(skb_ecn_set_ce), \ + FN(get_listener_sock), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit From b55aa7b04bb42274b4a894020b5b2fa059c3527e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:09 -0700 Subject: bpf: Test ref release issue in bpf_tcp_sock and bpf_sk_fullsock Adding verifier tests to ensure the ptr returned from bpf_tcp_sock() and bpf_sk_fullsock() cannot be accessed after bpf_sk_release() is called. A few of the tests are derived from a reproducer test by Lorenz Bauer. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/verifier/ref_tracking.c | 168 +++++++++++++++++++++ tools/testing/selftests/bpf/verifier/sock.c | 4 +- 2 files changed, 170 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 3ed3593bd8b6..923f2110072d 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -605,3 +605,171 @@ .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = ACCEPT, }, +{ + "reference tracking: use ptr from bpf_tcp_sock() after release", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_sk_fullsock() after release", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_sk_fullsock(tp) after release", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use sk after bpf_sk_release(tp)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, +{ + "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_listener_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, +}, +{ + "reference tracking: bpf_sk_release(listen_sk)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_listener_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "reference has not been acquired before", +}, +{ + /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */ + "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)", + .insns = { + BPF_SK_LOOKUP, + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_fullsock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_tcp_sock), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", +}, diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index 0ddfdf76aba5..416436231fab 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -342,7 +342,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "type=sock_common expected=sock", + .errstr = "reference has not been acquired before", }, { "bpf_sk_release(bpf_sk_fullsock(skb->sk))", @@ -380,5 +380,5 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "type=tcp_sock expected=sock", + .errstr = "reference has not been acquired before", }, -- cgit From 7681e7b2fbe2a78806423810c0d84dd230b96f94 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 12 Mar 2019 10:23:11 -0700 Subject: bpf: Add an example for bpf_get_listener_sock This patch adds an example in using the new helper bpf_get_listener_sock(). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/bpf_helpers.h | 2 + .../selftests/bpf/progs/test_sock_fields_kern.c | 88 +++++++++++--- tools/testing/selftests/bpf/test_sock_fields.c | 134 ++++++++++++++++----- 3 files changed, 180 insertions(+), 44 deletions(-) diff --git a/tools/testing/selftests/bpf/bpf_helpers.h b/tools/testing/selftests/bpf/bpf_helpers.h index c9433a496d54..c81fc350f7ad 100644 --- a/tools/testing/selftests/bpf/bpf_helpers.h +++ b/tools/testing/selftests/bpf/bpf_helpers.h @@ -180,6 +180,8 @@ static struct bpf_sock *(*bpf_sk_fullsock)(struct bpf_sock *sk) = (void *) BPF_FUNC_sk_fullsock; static struct bpf_tcp_sock *(*bpf_tcp_sock)(struct bpf_sock *sk) = (void *) BPF_FUNC_tcp_sock; +static struct bpf_sock *(*bpf_get_listener_sock)(struct bpf_sock *sk) = + (void *) BPF_FUNC_get_listener_sock; static int (*bpf_skb_ecn_set_ce)(void *ctx) = (void *) BPF_FUNC_skb_ecn_set_ce; diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c index de1a43e8f610..37328f148538 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c @@ -8,38 +8,51 @@ #include "bpf_helpers.h" #include "bpf_endian.h" -enum bpf_array_idx { - SRV_IDX, - CLI_IDX, - __NR_BPF_ARRAY_IDX, +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, }; struct bpf_map_def SEC("maps") addr_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct sockaddr_in6), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_ADDR_ARRAY_IDX, }; struct bpf_map_def SEC("maps") sock_result_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct bpf_sock), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_RESULT_ARRAY_IDX, }; struct bpf_map_def SEC("maps") tcp_sock_result_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(struct bpf_tcp_sock), - .max_entries = __NR_BPF_ARRAY_IDX, + .max_entries = __NR_BPF_RESULT_ARRAY_IDX, }; struct bpf_map_def SEC("maps") linum_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(__u32), .value_size = sizeof(__u32), - .max_entries = 1, + .max_entries = __NR_BPF_LINUM_ARRAY_IDX, }; static bool is_loopback6(__u32 *a6) @@ -100,18 +113,20 @@ static void tpcpy(struct bpf_tcp_sock *dst, #define RETURN { \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &idx0, &linum, 0); \ + bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ return 1; \ } SEC("cgroup_skb/egress") -int read_sock_fields(struct __sk_buff *skb) +int egress_read_sock_fields(struct __sk_buff *skb) { - __u32 srv_idx = SRV_IDX, cli_idx = CLI_IDX, idx; + __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; struct sockaddr_in6 *srv_sa6, *cli_sa6; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; - __u32 linum, idx0 = 0; + __u32 linum, linum_idx; + + linum_idx = EGRESS_LINUM_IDX; sk = skb->sk; if (!sk || sk->state == 10) @@ -132,14 +147,55 @@ int read_sock_fields(struct __sk_buff *skb) RETURN; if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) - idx = srv_idx; + result_idx = EGRESS_SRV_IDX; else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) - idx = cli_idx; + result_idx = EGRESS_CLI_IDX; else RETURN; - sk_ret = bpf_map_lookup_elem(&sock_result_map, &idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &idx); + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); + if (!sk_ret || !tp_ret) + RETURN; + + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); + + RETURN; +} + +SEC("cgroup_skb/ingress") +int ingress_read_sock_fields(struct __sk_buff *skb) +{ + __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; + struct bpf_tcp_sock *tp, *tp_ret; + struct bpf_sock *sk, *sk_ret; + struct sockaddr_in6 *srv_sa6; + __u32 linum, linum_idx; + + linum_idx = INGRESS_LINUM_IDX; + + sk = skb->sk; + if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) + RETURN; + + srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); + if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) + RETURN; + + if (sk->state != 10 && sk->state != 12) + RETURN; + + sk = bpf_get_listener_sock(sk); + if (!sk) + RETURN; + + tp = bpf_tcp_sock(sk); + if (!tp) + RETURN; + + sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); + tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); if (!sk_ret || !tp_ret) RETURN; diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c index bc8943938bf5..dcae7f664dce 100644 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ b/tools/testing/selftests/bpf/test_sock_fields.c @@ -16,10 +16,23 @@ #include "cgroup_helpers.h" #include "bpf_rlimit.h" -enum bpf_array_idx { - SRV_IDX, - CLI_IDX, - __NR_BPF_ARRAY_IDX, +enum bpf_addr_array_idx { + ADDR_SRV_IDX, + ADDR_CLI_IDX, + __NR_BPF_ADDR_ARRAY_IDX, +}; + +enum bpf_result_array_idx { + EGRESS_SRV_IDX, + EGRESS_CLI_IDX, + INGRESS_LISTEN_IDX, + __NR_BPF_RESULT_ARRAY_IDX, +}; + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, }; #define CHECK(condition, tag, format...) ({ \ @@ -41,8 +54,16 @@ static int linum_map_fd; static int addr_map_fd; static int tp_map_fd; static int sk_map_fd; -static __u32 srv_idx = SRV_IDX; -static __u32 cli_idx = CLI_IDX; + +static __u32 addr_srv_idx = ADDR_SRV_IDX; +static __u32 addr_cli_idx = ADDR_CLI_IDX; + +static __u32 egress_srv_idx = EGRESS_SRV_IDX; +static __u32 egress_cli_idx = EGRESS_CLI_IDX; +static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; + +static __u32 egress_linum_idx = EGRESS_LINUM_IDX; +static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; static void init_loopback6(struct sockaddr_in6 *sa6) { @@ -93,29 +114,46 @@ static void print_tp(const struct bpf_tcp_sock *tp) static void check_result(void) { - struct bpf_tcp_sock srv_tp, cli_tp; - struct bpf_sock srv_sk, cli_sk; - __u32 linum, idx0 = 0; + struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; + struct bpf_sock srv_sk, cli_sk, listen_sk; + __u32 ingress_linum, egress_linum; int err; - err = bpf_map_lookup_elem(linum_map_fd, &idx0, &linum); + err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, + &egress_linum); CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(sk_map_fd, &srv_idx, &srv_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &srv_idx)", + err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, + &ingress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", + "err:%d errno:%d", err, errno); + err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", + "err:%d errno:%d", err, errno); + + err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &srv_idx, &srv_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &srv_idx)", + err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(sk_map_fd, &cli_idx, &cli_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &cli_idx)", + err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); + CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &cli_idx, &cli_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &cli_idx)", + err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); + CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", "err:%d errno:%d", err, errno); + printf("listen_sk: "); + print_sk(&listen_sk); + printf("\n"); + printf("srv_sk: "); print_sk(&srv_sk); printf("\n"); @@ -124,6 +162,10 @@ static void check_result(void) print_sk(&cli_sk); printf("\n"); + printf("listen_tp: "); + print_tp(&listen_tp); + printf("\n"); + printf("srv_tp: "); print_tp(&srv_tp); printf("\n"); @@ -132,6 +174,19 @@ static void check_result(void) print_tp(&cli_tp); printf("\n"); + CHECK(listen_sk.state != 10 || + listen_sk.family != AF_INET6 || + listen_sk.protocol != IPPROTO_TCP || + memcmp(listen_sk.src_ip6, &in6addr_loopback, + sizeof(listen_sk.src_ip6)) || + listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || + listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || + listen_sk.src_port != ntohs(srv_sa6.sin6_port) || + listen_sk.dst_port, + "Unexpected listen_sk", + "Check listen_sk output. ingress_linum:%u", + ingress_linum); + CHECK(srv_sk.state == 10 || !srv_sk.state || srv_sk.family != AF_INET6 || @@ -142,7 +197,8 @@ static void check_result(void) sizeof(srv_sk.dst_ip6)) || srv_sk.src_port != ntohs(srv_sa6.sin6_port) || srv_sk.dst_port != cli_sa6.sin6_port, - "Unexpected srv_sk", "Check srv_sk output. linum:%u", linum); + "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", + egress_linum); CHECK(cli_sk.state == 10 || !cli_sk.state || @@ -154,21 +210,31 @@ static void check_result(void) sizeof(cli_sk.dst_ip6)) || cli_sk.src_port != ntohs(cli_sa6.sin6_port) || cli_sk.dst_port != srv_sa6.sin6_port, - "Unexpected cli_sk", "Check cli_sk output. linum:%u", linum); + "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", + egress_linum); + + CHECK(listen_tp.data_segs_out || + listen_tp.data_segs_in || + listen_tp.total_retrans || + listen_tp.bytes_acked, + "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", + ingress_linum); CHECK(srv_tp.data_segs_out != 1 || srv_tp.data_segs_in || srv_tp.snd_cwnd != 10 || srv_tp.total_retrans || srv_tp.bytes_acked != DATA_LEN, - "Unexpected srv_tp", "Check srv_tp output. linum:%u", linum); + "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", + egress_linum); CHECK(cli_tp.data_segs_out || cli_tp.data_segs_in != 1 || cli_tp.snd_cwnd != 10 || cli_tp.total_retrans || cli_tp.bytes_received != DATA_LEN, - "Unexpected cli_tp", "Check cli_tp output. linum:%u", linum); + "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", + egress_linum); } static void test(void) @@ -211,10 +277,10 @@ static void test(void) err, errno); /* Update addr_map with srv_sa6 and cli_sa6 */ - err = bpf_map_update_elem(addr_map_fd, &srv_idx, &srv_sa6, 0); + err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); CHECK(err, "map_update", "err:%d errno:%d", err, errno); - err = bpf_map_update_elem(addr_map_fd, &cli_idx, &cli_sa6, 0); + err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); CHECK(err, "map_update", "err:%d errno:%d", err, errno); /* Connect from cli_sa6 to srv_sa6 */ @@ -273,9 +339,9 @@ int main(int argc, char **argv) struct bpf_prog_load_attr attr = { .file = "test_sock_fields_kern.o", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .expected_attach_type = BPF_CGROUP_INET_EGRESS, }; - int cgroup_fd, prog_fd, err; + int cgroup_fd, egress_fd, ingress_fd, err; + struct bpf_program *ingress_prog; struct bpf_object *obj; struct bpf_map *map; @@ -293,12 +359,24 @@ int main(int argc, char **argv) err = join_cgroup(TEST_CGROUP); CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno); - err = bpf_prog_load_xattr(&attr, &obj, &prog_fd); + err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); - err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); + ingress_prog = bpf_object__find_program_by_title(obj, + "cgroup_skb/ingress"); + CHECK(!ingress_prog, + "bpf_object__find_program_by_title(cgroup_skb/ingress)", + "not found"); + ingress_fd = bpf_program__fd(ingress_prog); + + err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", "err:%d errno%d", err, errno); + + err = bpf_prog_attach(ingress_fd, cgroup_fd, + BPF_CGROUP_INET_INGRESS, 0); + CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", + "err:%d errno%d", err, errno); close(cgroup_fd); map = bpf_object__find_map_by_name(obj, "addr_map"); -- cgit From f6cab793d4a70808e4946baa8f5df4ea9adacc82 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 14 Mar 2019 17:40:16 +0000 Subject: MIPS: Remove custom MIPS32 __kernel_fsid_t type For MIPS32 kernels we have a custom definition of __kernel_fsid_t. This differs from the asm-generic version used by all other architectures & MIPS64 in one way - it declares the val field as an array of long, rather than an array of int. Since int & long have identical size & alignment when targeting MIPS32 anyway, this makes little sense. Beyond the pointlessness this causes problems for code which prints entries from the val array, for example the fanotify_encode_fid() function [1]. If such code uses a format specified suited to an int then it encounters compiler warnings when building for MIPS32, such as: In file included from include/linux/kernel.h:14:0, from include/linux/list.h:9, from include/linux/preempt.h:11, from include/linux/spinlock.h:51, from include/linux/fdtable.h:11, from fs/notify/fanotify/fanotify.c:3: fs/notify/fanotify/fanotify.c: In function 'fanotify_encode_fid': include/linux/kern_levels.h:5:18: warning: format '%x' expects argument of type 'unsigned int', but argument 2 has type 'long int' [-Wformat=] Remove the custom __kernel_fsid_t definition & make use of the asm-generic version which will have an identical layout in memory anyway, in order to remove the inconsistency with other architectures. One possible regression this could cause if is any code is attempting to print entries from the val array with a long-sized format specifier, in which case it would begin seeing compiler warnings when built against kernel headers including this change. Since such code is exceedingly rare, and would have to be MIPS32-specific to expect a long, this seems to be a problem that it's extremely unlikely anyone will encounter. [1] https://lore.kernel.org/linux-mips/CAOQ4uxiEkczB7PNCXegFC-eYb9zAGaio_o=OgHAJHFd7eavBxA@mail.gmail.com/T/#mb43103277c79ef06b884359209e817db1c136140 Signed-off-by: Paul Burton Cc: Amir Goldstein Cc: Arnd Bergmann Cc: Jan Kara Cc: linux-arch@vger.kernel.org Cc: linux-mips@vger.kernel.org Cc: linux-kernel@vger.kernel.org --- arch/mips/include/uapi/asm/posix_types.h | 7 ------- 1 file changed, 7 deletions(-) diff --git a/arch/mips/include/uapi/asm/posix_types.h b/arch/mips/include/uapi/asm/posix_types.h index 6aa49c10f88f..f0ccb5b90ce9 100644 --- a/arch/mips/include/uapi/asm/posix_types.h +++ b/arch/mips/include/uapi/asm/posix_types.h @@ -21,13 +21,6 @@ typedef long __kernel_daddr_t; #define __kernel_daddr_t __kernel_daddr_t -#if (_MIPS_SZLONG == 32) -typedef struct { - long val[2]; -} __kernel_fsid_t; -#define __kernel_fsid_t __kernel_fsid_t -#endif - #include #endif /* _ASM_POSIX_TYPES_H */ -- cgit From 9768095ba97ce946838e8210f0b44f2fd36ec31d Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 10 Mar 2019 17:44:09 -0700 Subject: btf: resolve enum fwds in btf_dedup GCC and clang support enum forward declarations as an extension. Such forward-declared enums will be represented as normal BTF_KIND_ENUM types with vlen=0. This patch adds ability to resolve such enums to their corresponding fully defined enums. This helps to avoid duplicated BTF type graphs which only differ by some types referencing forward-declared enum vs full enum. One such example in kernel is enum irqchip_irq_state, defined in include/linux/interrupt.h and forward-declared in include/linux/irq.h. This causes entire struct task_struct and all referenced types to be duplicated in btf_dedup output. This patch eliminates such duplication cases. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/btf.c | 51 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1b8d8cdd3575..87e3020ac1bc 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1602,16 +1602,12 @@ static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) /* Calculate type signature hash of ENUM. */ static __u32 btf_hash_enum(struct btf_type *t) { - struct btf_enum *member = (struct btf_enum *)(t + 1); - __u32 vlen = BTF_INFO_VLEN(t->info); - __u32 h = btf_hash_common(t); - int i; + __u32 h; - for (i = 0; i < vlen; i++) { - h = hash_combine(h, member->name_off); - h = hash_combine(h, member->val); - member++; - } + /* don't hash vlen and enum members to support enum fwd resolving */ + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info & ~0xffff); + h = hash_combine(h, t->size); return h; } @@ -1637,6 +1633,22 @@ static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) return true; } +static inline bool btf_is_enum_fwd(struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM && + BTF_INFO_VLEN(t->info) == 0; +} + +static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) + return btf_equal_enum(t1, t2); + /* ignore vlen when comparing */ + return t1->name_off == t2->name_off && + (t1->info & ~0xffff) == (t2->info & ~0xffff) && + t1->size == t2->size; +} + /* * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, * as referenced type IDs equivalence is established separately during type @@ -1860,6 +1872,17 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) new_id = cand_node->type_id; break; } + if (d->opts.dont_resolve_fwds) + continue; + if (btf_compat_enum(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ + new_id = cand_node->type_id; + break; + } + /* resolve canonical enum fwd to full enum */ + d->map[cand_node->type_id] = type_id; + } } break; @@ -2084,15 +2107,15 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, return fwd_kind == real_kind; } - if (cand_type->info != canon_type->info) - return 0; - switch (cand_kind) { case BTF_KIND_INT: return btf_equal_int(cand_type, canon_type); case BTF_KIND_ENUM: - return btf_equal_enum(cand_type, canon_type); + if (d->opts.dont_resolve_fwds) + return btf_equal_enum(cand_type, canon_type); + else + return btf_compat_enum(cand_type, canon_type); case BTF_KIND_FWD: return btf_equal_common(cand_type, canon_type); @@ -2103,6 +2126,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, case BTF_KIND_PTR: case BTF_KIND_TYPEDEF: case BTF_KIND_FUNC: + if (cand_type->info != canon_type->info) + return 0; return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); case BTF_KIND_ARRAY: { -- cgit From 8fd7a61aa556ad518e897f58264a2e67f9c527f5 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Sun, 10 Mar 2019 17:44:10 -0700 Subject: selftests/bpf: add fwd enum resolution test for btf_dedup This patch adds test verifying new btf_dedup logic of resolving forward-declared enums. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/test_btf.c | 44 ++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c index 38797aa627a7..23e3b314ca60 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/test_btf.c @@ -5874,6 +5874,50 @@ const struct btf_dedup_test dedup_tests[] = { .dont_resolve_fwds = false, }, }, +{ + .descr = "dedup: enum fwd resolution", + .input = { + .raw_types = { + /* [1] fwd enum 'e1' before full enum */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [2] full enum 'e1' after fwd */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [3] full enum 'e2' before fwd */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [4] fwd enum 'e2' after full enum */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4), + /* [5] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [6] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .expect = { + .raw_types = { + /* [1] full enum 'e1' */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 123), + /* [2] full enum 'e2' */ + BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(4), 456), + /* [3] incompatible fwd enum with different size */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1), + /* [4] incompatible full enum with different value */ + BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4), + BTF_ENUM_ENC(NAME_NTH(2), 321), + BTF_END_RAW, + }, + BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"), + }, + .opts = { + .dont_resolve_fwds = false, + }, +}, }; -- cgit From 62369db2df8d1edfa040878203b446e023a16802 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:39 +0000 Subject: bpf: fix documentation for eBPF helpers Another round of minor fixes for the documentation of the BPF helpers located in the UAPI bpf.h header file. Changes include: - Moving around description of some helpers, to keep the descriptions in the same order as helpers are declared (bpf_map_push_elem(), leftover from commit 90b1023f68c7 ("bpf: fix documentation for eBPF helpers"), bpf_rc_keydown(), and bpf_skb_ancestor_cgroup_id()). - Fixing typos ("contex" -> "context"). - Harmonising return types ("void* " -> "void *", "uint64_t" -> "u64"). - Addition of the "bpf_" prefix to bpf_get_storage(). - Light additions of RST markup on some keywords. - Empty line deletion between description and return value for bpf_tcp_sock(). - Edit for the description for bpf_skb_ecn_set_ce() (capital letters, acronym expansion, no effect if ECT not set, more details on return value). Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 128 ++++++++++++++++++++++++----------------------- 1 file changed, 65 insertions(+), 63 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 983b25cb608d..4465d00d3493 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -502,16 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is removed to - * make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1435,14 +1425,14 @@ union bpf_attr { * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return * A 8-byte long non-decreasing number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. * @@ -2098,52 +2088,52 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * int bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * int bpf_rc_repeat(void *ctx) + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) * Description * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () @@ -2159,30 +2149,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * * u64 bpf_get_current_cgroup_id(void) * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. * - * void* get_local_storage(void *map, u64 flags) + * void *bpf_get_local_storage(void *map, u64 flags) * Description * Get the pointer to the local storage area. * The type and the size of the local storage is defined @@ -2209,6 +2181,24 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child @@ -2289,6 +2279,16 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. @@ -2346,33 +2346,35 @@ union bpf_attr { * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such - * that all the fields in bpf_sock can be accessed. + * that all the fields in this **bpf_sock** can be accessed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. * * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. - * * Return - * A **struct bpf_tcp_sock** pointer on success, or NULL in + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * * int bpf_skb_ecn_set_ce(struct sk_buf *skb) - * Description - * Sets ECN of IP header to ce (congestion encountered) if - * current value is ect (ECN capable). Works with IPv6 and IPv4. - * Return - * 1 if set, 0 if not set. + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. * * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) * Description - * Return a **struct bpf_sock** pointer in TCP_LISTEN state. - * bpf_sk_release() is unnecessary and not allowed. + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ -- cgit From 0eb0978528d47699edd091dc2c337952ad8da436 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:40 +0000 Subject: bpf: add documentation for helpers bpf_spin_lock(), bpf_spin_unlock() Add documentation for the BPF spinlock-related helpers to the doc in bpf.h. I added the constraints and restrictions coming with the use of spinlocks for BPF: not all of it is directly related to the use of the helper, but I thought it would be nice for users to find them in the man page. This list of restrictions is nearly a verbatim copy of the list in Alexei's commit log for those helpers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4465d00d3493..929c8e537a14 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2343,6 +2343,61 @@ union bpf_attr { * Return * 0 * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such -- cgit From ea6eced00e4b28821804eee24e80d9528f48972a Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Thu, 14 Mar 2019 12:38:41 +0000 Subject: tools: bpf: synchronise BPF UAPI header with tools Synchronise the bpf.h header under tools, to report the latest fixes and additions to the documentation for the BPF helpers. Signed-off-by: Quentin Monnet Reviewed-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- tools/include/uapi/linux/bpf.h | 183 +++++++++++++++++++++++++++-------------- 1 file changed, 120 insertions(+), 63 deletions(-) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 983b25cb608d..929c8e537a14 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -502,16 +502,6 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) - * Description - * Push an element *value* in *map*. *flags* is one of: - * - * **BPF_EXIST** - * If the queue/stack is full, the oldest element is removed to - * make room for this. - * Return - * 0 on success, or a negative error in case of failure. - * * int bpf_probe_read(void *dst, u32 size, const void *src) * Description * For tracing programs, safely attempt to read *size* bytes from @@ -1435,14 +1425,14 @@ union bpf_attr { * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_addr** contex. + * *skb*, but gets socket from **struct bpf_sock_addr** context. * Return * A 8-byte long non-decreasing number. * * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx) * Description * Equivalent to bpf_get_socket_cookie() helper that accepts - * *skb*, but gets socket from **struct bpf_sock_ops** contex. + * *skb*, but gets socket from **struct bpf_sock_ops** context. * Return * A 8-byte long non-decreasing number. * @@ -2098,52 +2088,52 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * int bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded key press with *scancode*, - * *toggle* value in the given *protocol*. The scancode will be - * translated to a keycode using the rc keymap, and reported as - * an input key down event. After a period a key up event is - * generated. This period can be extended by calling either - * **bpf_rc_keydown**\ () again with the same values, or calling - * **bpf_rc_repeat**\ (). + * report a successfully decoded repeat key message. This delays + * the generation of a key up event for previously generated + * key down event. * - * Some protocols include a toggle bit, in case the button was - * released and pressed again between consecutive scancodes. + * Some IR protocols like NEC have a special IR message for + * repeating last button, for when a button is held down. * * The *ctx* should point to the lirc sample as passed into * the program. * - * The *protocol* is the decoded protocol number (see - * **enum rc_proto** for some predefined values). - * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * int bpf_rc_repeat(void *ctx) + * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to - * report a successfully decoded repeat key message. This delays - * the generation of a key up event for previously generated - * key down event. + * report a successfully decoded key press with *scancode*, + * *toggle* value in the given *protocol*. The scancode will be + * translated to a keycode using the rc keymap, and reported as + * an input key down event. After a period a key up event is + * generated. This period can be extended by calling either + * **bpf_rc_keydown**\ () again with the same values, or calling + * **bpf_rc_repeat**\ (). * - * Some IR protocols like NEC have a special IR message for - * repeating last button, for when a button is held down. + * Some protocols include a toggle bit, in case the button was + * released and pressed again between consecutive scancodes. * * The *ctx* should point to the lirc sample as passed into * the program. * + * The *protocol* is the decoded protocol number (see + * **enum rc_proto** for some predefined values). + * * This helper is only available is the kernel was compiled with * the **CONFIG_BPF_LIRC_MODE2** configuration option set to * "**y**". * Return * 0 * - * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb) + * u64 bpf_skb_cgroup_id(struct sk_buff *skb) * Description * Return the cgroup v2 id of the socket associated with the *skb*. * This is roughly similar to the **bpf_get_cgroup_classid**\ () @@ -2159,30 +2149,12 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) - * Description - * Return id of cgroup v2 that is ancestor of cgroup associated - * with the *skb* at the *ancestor_level*. The root cgroup is at - * *ancestor_level* zero and each step down the hierarchy - * increments the level. If *ancestor_level* == level of cgroup - * associated with *skb*, then return value will be same as that - * of **bpf_skb_cgroup_id**\ (). - * - * The helper is useful to implement policies based on cgroups - * that are upper in hierarchy than immediate cgroup associated - * with *skb*. - * - * The format of returned id and helper limitations are same as in - * **bpf_skb_cgroup_id**\ (). - * Return - * The id is returned or 0 in case the id could not be retrieved. - * * u64 bpf_get_current_cgroup_id(void) * Return * A 64-bit integer containing the current cgroup id based * on the cgroup within which the current task is running. * - * void* get_local_storage(void *map, u64 flags) + * void *bpf_get_local_storage(void *map, u64 flags) * Description * Get the pointer to the local storage area. * The type and the size of the local storage is defined @@ -2209,6 +2181,24 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level) + * Description + * Return id of cgroup v2 that is ancestor of cgroup associated + * with the *skb* at the *ancestor_level*. The root cgroup is at + * *ancestor_level* zero and each step down the hierarchy + * increments the level. If *ancestor_level* == level of cgroup + * associated with *skb*, then return value will be same as that + * of **bpf_skb_cgroup_id**\ (). + * + * The helper is useful to implement policies based on cgroups + * that are upper in hierarchy than immediate cgroup associated + * with *skb*. + * + * The format of returned id and helper limitations are same as in + * **bpf_skb_cgroup_id**\ (). + * Return + * The id is returned or 0 in case the id could not be retrieved. + * * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags) * Description * Look for TCP socket matching *tuple*, optionally in a child @@ -2289,6 +2279,16 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * + * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * Description + * Push an element *value* in *map*. *flags* is one of: + * + * **BPF_EXIST** + * If the queue/stack is full, the oldest element is + * removed to make room for this. + * Return + * 0 on success, or a negative error in case of failure. + * * int bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. @@ -2343,36 +2343,93 @@ union bpf_attr { * Return * 0 * + * int bpf_spin_lock(struct bpf_spin_lock *lock) + * Description + * Acquire a spinlock represented by the pointer *lock*, which is + * stored as part of a value of a map. Taking the lock allows to + * safely update the rest of the fields in that value. The + * spinlock can (and must) later be released with a call to + * **bpf_spin_unlock**\ (\ *lock*\ ). + * + * Spinlocks in BPF programs come with a number of restrictions + * and constraints: + * + * * **bpf_spin_lock** objects are only allowed inside maps of + * types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this + * list could be extended in the future). + * * BTF description of the map is mandatory. + * * The BPF program can take ONE lock at a time, since taking two + * or more could cause dead locks. + * * Only one **struct bpf_spin_lock** is allowed per map element. + * * When the lock is taken, calls (either BPF to BPF or helpers) + * are not allowed. + * * The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not + * allowed inside a spinlock-ed region. + * * The BPF program MUST call **bpf_spin_unlock**\ () to release + * the lock, on all execution paths, before it returns. + * * The BPF program can access **struct bpf_spin_lock** only via + * the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ () + * helpers. Loading or storing data into the **struct + * bpf_spin_lock** *lock*\ **;** field of a map is not allowed. + * * To use the **bpf_spin_lock**\ () helper, the BTF description + * of the map value must be a struct and have **struct + * bpf_spin_lock** *anyname*\ **;** field at the top level. + * Nested lock inside another struct is not allowed. + * * The **struct bpf_spin_lock** *lock* field in a map value must + * be aligned on a multiple of 4 bytes in that value. + * * Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy + * the **bpf_spin_lock** field to user space. + * * Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from + * a BPF program, do not update the **bpf_spin_lock** field. + * * **bpf_spin_lock** cannot be on the stack or inside a + * networking packet (it can only be inside of a map values). + * * **bpf_spin_lock** is available to root only. + * * Tracing programs and socket filter programs cannot use + * **bpf_spin_lock**\ () due to insufficient preemption checks + * (but this may change in the future). + * * **bpf_spin_lock** is not allowed in inner maps of map-in-map. + * Return + * 0 + * + * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * Description + * Release the *lock* previously locked by a call to + * **bpf_spin_lock**\ (\ *lock*\ ). + * Return + * 0 + * * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_sock** pointer such - * that all the fields in bpf_sock can be accessed. + * that all the fields in this **bpf_sock** can be accessed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. * * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk) * Description * This helper gets a **struct bpf_tcp_sock** pointer from a * **struct bpf_sock** pointer. - * * Return - * A **struct bpf_tcp_sock** pointer on success, or NULL in + * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * * int bpf_skb_ecn_set_ce(struct sk_buf *skb) - * Description - * Sets ECN of IP header to ce (congestion encountered) if - * current value is ect (ECN capable). Works with IPv6 and IPv4. - * Return - * 1 if set, 0 if not set. + * Description + * Set ECN (Explicit Congestion Notification) field of IP header + * to **CE** (Congestion Encountered) if current value is **ECT** + * (ECN Capable Transport). Otherwise, do nothing. Works with IPv6 + * and IPv4. + * Return + * 1 if the **CE** flag is set (either by the current helper call + * or because it was already present), 0 if it is not set. * * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk) * Description - * Return a **struct bpf_sock** pointer in TCP_LISTEN state. - * bpf_sk_release() is unnecessary and not allowed. + * Return a **struct bpf_sock** pointer in **TCP_LISTEN** state. + * **bpf_sk_release**\ () is unnecessary and not allowed. * Return - * A **struct bpf_sock** pointer on success, or NULL in + * A **struct bpf_sock** pointer on success, or **NULL** in * case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ -- cgit From fa30dde38aa8628c73a6dded7cb0bba38c27b576 Mon Sep 17 00:00:00 2001 From: Jiufei Xue Date: Thu, 14 Mar 2019 23:19:22 -0400 Subject: ext4: fix NULL pointer dereference while journal is aborted We see the following NULL pointer dereference while running xfstests generic/475: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 8000000c84bad067 P4D 8000000c84bad067 PUD c84e62067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 7 PID: 9886 Comm: fsstress Kdump: loaded Not tainted 5.0.0-rc8 #10 RIP: 0010:ext4_do_update_inode+0x4ec/0x760 ... Call Trace: ? jbd2_journal_get_write_access+0x42/0x50 ? __ext4_journal_get_write_access+0x2c/0x70 ? ext4_truncate+0x186/0x3f0 ext4_mark_iloc_dirty+0x61/0x80 ext4_mark_inode_dirty+0x62/0x1b0 ext4_truncate+0x186/0x3f0 ? unmap_mapping_pages+0x56/0x100 ext4_setattr+0x817/0x8b0 notify_change+0x1df/0x430 do_truncate+0x5e/0x90 ? generic_permission+0x12b/0x1a0 This is triggered because the NULL pointer handle->h_transaction was dereferenced in function ext4_update_inode_fsync_trans(). I found that the h_transaction was set to NULL in jbd2__journal_restart but failed to attached to a new transaction while the journal is aborted. Fix this by checking the handle before updating the inode. Fixes: b436b9bef84d ("ext4: Wait for proper transaction commit on fsync") Signed-off-by: Jiufei Xue Signed-off-by: Theodore Ts'o Reviewed-by: Joseph Qi Cc: stable@kernel.org --- fs/ext4/ext4_jbd2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index 15b6dd733780..df908ef79cce 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -384,7 +384,7 @@ static inline void ext4_update_inode_fsync_trans(handle_t *handle, { struct ext4_inode_info *ei = EXT4_I(inode); - if (ext4_handle_valid(handle)) { + if (ext4_handle_valid(handle) && !is_handle_aborted(handle)) { ei->i_sync_tid = handle->h_transaction->t_tid; if (datasync) ei->i_datasync_tid = handle->h_transaction->t_tid; -- cgit From 372a03e01853f860560eade508794dd274e9b390 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Thu, 14 Mar 2019 23:20:25 -0400 Subject: ext4: fix data corruption caused by unaligned direct AIO Ext4 needs to serialize unaligned direct AIO because the zeroing of partial blocks of two competing unaligned AIOs can result in data corruption. However it decides not to serialize if the potentially unaligned aio is past i_size with the rationale that no pending writes are possible past i_size. Unfortunately if the i_size is not block aligned and the second unaligned write lands past i_size, but still into the same block, it has the potential of corrupting the previous unaligned write to the same block. This is (very simplified) reproducer from Frank // 41472 = (10 * 4096) + 512 // 37376 = 41472 - 4096 ftruncate(fd, 41472); io_prep_pwrite(iocbs[0], fd, buf[0], 4096, 37376); io_prep_pwrite(iocbs[1], fd, buf[1], 4096, 41472); io_submit(io_ctx, 1, &iocbs[1]); io_submit(io_ctx, 1, &iocbs[2]); io_getevents(io_ctx, 2, 2, events, NULL); Without this patch the 512B range from 40960 up to the start of the second unaligned write (41472) is going to be zeroed overwriting the data written by the first write. This is a data corruption. 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00009200 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 * 0000a000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0000a200 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 With this patch the data corruption is avoided because we will recognize the unaligned_aio and wait for the unwritten extent conversion. 00000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 00009200 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 30 * 0000a200 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 31 * 0000b200 Reported-by: Frank Sorenson Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Fixes: e9e3bcecf44c ("ext4: serialize unaligned asynchronous DIO") Cc: stable@vger.kernel.org --- fs/ext4/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 69d65d49837b..98ec11f69cd4 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -125,7 +125,7 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) struct super_block *sb = inode->i_sb; int blockmask = sb->s_blocksize - 1; - if (pos >= i_size_read(inode)) + if (pos >= ALIGN(i_size_read(inode), sb->s_blocksize)) return 0; if ((pos | iov_iter_alignment(from)) & blockmask) -- cgit From 1dc1097ff60e4105216da7cd0aa99032b039a994 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 14 Mar 2019 23:46:05 -0400 Subject: ext4: avoid panic during forced reboot When admin calls "reboot -f" - i.e., does a hard system reboot by directly calling reboot(2) - ext4 filesystem mounted with errors=panic can panic the system. This happens because the underlying device gets disabled without unmounting the filesystem and thus some syscall running in parallel to reboot(2) can result in the filesystem getting IO errors. This is somewhat surprising to the users so try improve the behavior by switching to errors=remount-ro behavior when the system is running reboot(2). Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/super.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 6e4cac646345..1a5729d8f9ec 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -430,6 +430,12 @@ static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn) spin_unlock(&sbi->s_md_lock); } +static bool system_going_down(void) +{ + return system_state == SYSTEM_HALT || system_state == SYSTEM_POWER_OFF + || system_state == SYSTEM_RESTART; +} + /* Deal with the reporting of failure conditions on a filesystem such as * inconsistencies detected or read IO failures. * @@ -460,7 +466,12 @@ static void ext4_handle_error(struct super_block *sb) if (journal) jbd2_journal_abort(journal, -EIO); } - if (test_opt(sb, ERRORS_RO)) { + /* + * We force ERRORS_RO behavior when system is rebooting. Otherwise we + * could panic during 'reboot -f' as the underlying device got already + * disabled. + */ + if (test_opt(sb, ERRORS_RO) || system_going_down()) { ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); /* * Make sure updated value of ->s_mount_flags will be visible @@ -468,8 +479,7 @@ static void ext4_handle_error(struct super_block *sb) */ smp_wmb(); sb->s_flags |= SB_RDONLY; - } - if (test_opt(sb, ERRORS_PANIC)) { + } else if (test_opt(sb, ERRORS_PANIC)) { if (EXT4_SB(sb)->s_journal && !(EXT4_SB(sb)->s_journal->j_flags & JBD2_REC_ERR)) return; -- cgit From 7cf77140777364d77b2b6e392e7e081a205a08c5 Mon Sep 17 00:00:00 2001 From: Jason Yan Date: Thu, 14 Mar 2019 23:51:13 -0400 Subject: ext4: remove useless ext4_pin_inode() This function is never used from the beginning (and is commented out); let's remove it. Signed-off-by: Jason Yan Signed-off-by: Theodore Ts'o --- fs/ext4/inode.c | 30 ------------------------------ 1 file changed, 30 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index f84cf62fd290..e5014a6f76e2 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -6082,36 +6082,6 @@ out: return; } -#if 0 -/* - * Bind an inode's backing buffer_head into this transaction, to prevent - * it from being flushed to disk early. Unlike - * ext4_reserve_inode_write, this leaves behind no bh reference and - * returns no iloc structure, so the caller needs to repeat the iloc - * lookup to mark the inode dirty later. - */ -static int ext4_pin_inode(handle_t *handle, struct inode *inode) -{ - struct ext4_iloc iloc; - - int err = 0; - if (handle) { - err = ext4_get_inode_loc(inode, &iloc); - if (!err) { - BUFFER_TRACE(iloc.bh, "get_write_access"); - err = jbd2_journal_get_write_access(handle, iloc.bh); - if (!err) - err = ext4_handle_dirty_metadata(handle, - NULL, - iloc.bh); - brelse(iloc.bh); - } - } - ext4_std_error(inode->i_sb, err); - return err; -} -#endif - int ext4_change_inode_journal_flag(struct inode *inode, int val) { journal_t *journal; -- cgit From e65ef56db4945fb18a0d522e056c02ddf939e644 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Mar 2019 10:16:44 -0600 Subject: io_uring: use regular request ref counts Get rid of the special casing of "normal" requests not having any references to the io_kiocb. We initialize the ref count to 2, one for the submission side, and one or the completion side. Signed-off-by: Jens Axboe --- fs/io_uring.c | 54 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 5d99376d2369..9071fca118a4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -411,7 +411,8 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, req->ctx = ctx; req->flags = 0; - refcount_set(&req->refs, 0); + /* one is dropped after submission, the other at completion */ + refcount_set(&req->refs, 2); return req; out: io_ring_drop_ctx_refs(ctx, 1); @@ -429,10 +430,14 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) static void io_free_req(struct io_kiocb *req) { - if (!refcount_read(&req->refs) || refcount_dec_and_test(&req->refs)) { - io_ring_drop_ctx_refs(req->ctx, 1); - kmem_cache_free(req_cachep, req); - } + io_ring_drop_ctx_refs(req->ctx, 1); + kmem_cache_free(req_cachep, req); +} + +static void io_put_req(struct io_kiocb *req) +{ + if (refcount_dec_and_test(&req->refs)) + io_free_req(req); } /* @@ -453,7 +458,8 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, io_cqring_fill_event(ctx, req->user_data, req->error, 0); - reqs[to_free++] = req; + if (refcount_dec_and_test(&req->refs)) + reqs[to_free++] = req; (*nr_events)++; /* @@ -616,7 +622,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) io_fput(req); io_cqring_add_event(req->ctx, req->user_data, res, 0); - io_free_req(req); + io_put_req(req); } static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) @@ -1083,7 +1089,7 @@ static int io_nop(struct io_kiocb *req, u64 user_data) io_fput(req); } io_cqring_add_event(ctx, user_data, err, 0); - io_free_req(req); + io_put_req(req); return 0; } @@ -1146,7 +1152,7 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, io_fput(req); io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); - io_free_req(req); + io_put_req(req); return 0; } @@ -1204,7 +1210,7 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) spin_unlock_irq(&ctx->completion_lock); io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); - io_free_req(req); + io_put_req(req); return 0; } @@ -1212,7 +1218,7 @@ static void io_poll_complete(struct io_kiocb *req, __poll_t mask) { io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0); io_fput(req); - io_free_req(req); + io_put_req(req); } static void io_poll_complete_work(struct work_struct *work) @@ -1346,9 +1352,6 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) INIT_LIST_HEAD(&poll->wait.entry); init_waitqueue_func_entry(&poll->wait, io_poll_wake); - /* one for removal from waitqueue, one for this function */ - refcount_set(&req->refs, 2); - mask = vfs_poll(poll->file, &ipt.pt) & poll->events; if (unlikely(!poll->head)) { /* we did not manage to set up a waitqueue, done */ @@ -1380,13 +1383,12 @@ out: * Drop one of our refs to this req, __io_submit_sqe() will * drop the other one since we're returning an error. */ - io_free_req(req); + io_put_req(req); return ipt.error; } if (mask) io_poll_complete(req, mask); - io_free_req(req); return 0; } @@ -1524,10 +1526,13 @@ restart: break; cond_resched(); } while (1); + + /* drop submission reference */ + io_put_req(req); } if (ret) { io_cqring_add_event(ctx, sqe->user_data, ret, 0); - io_free_req(req); + io_put_req(req); } /* async context always use a copy of the sqe */ @@ -1649,11 +1654,22 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, INIT_WORK(&req->work, io_sq_wq_submit_work); queue_work(ctx->sqo_wq, &req->work); } - ret = 0; + + /* + * Queued up for async execution, worker will release + * submit reference when the iocb is actually + * submitted. + */ + return 0; } } + + /* drop submission reference */ + io_put_req(req); + + /* and drop final reference, if we failed */ if (ret) - io_free_req(req); + io_put_req(req); return ret; } -- cgit From d64264d6218e6892edd832dc3a5a5857c2856c53 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 15 Mar 2019 00:15:32 -0400 Subject: ext4: add missing brelse() in add_new_gdb_meta_bg() Currently in add_new_gdb_meta_bg() there is a missing brelse of gdb_bh in case ext4_journal_get_write_access() fails. Additionally kvfree() is missing in the same error path. Fix it by moving the ext4_journal_get_write_access() before the ext4 sb update as Ted suggested and release n_group_desc and gdb_bh in case it fails. Fixes: 61a9c11e5e7a ("ext4: add missing brelse() add_new_gdb_meta_bg()'s error path") Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o --- fs/ext4/resize.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3d9b18505c0c..90061c3d048b 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -932,11 +932,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb, memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); n_group_desc[gdb_num] = gdb_bh; + + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (err) { + kvfree(n_group_desc); + brelse(gdb_bh); + return err; + } + EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; kvfree(o_group_desc); - BUFFER_TRACE(gdb_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gdb_bh); return err; } -- cgit From 6c7328400e0488f7d49e19e02290ba343b6811b2 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 15 Mar 2019 00:22:28 -0400 Subject: ext4: report real fs size after failed resize Currently when the file system resize using ext4_resize_fs() fails it will report into log that "resized filesystem to ". However this may not be true in the case of failure. Use the current block count as returned by ext4_blocks_count() to report the block count. Additionally, report a warning that "error occurred during file system resize" Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o --- fs/ext4/resize.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 90061c3d048b..e7ae26e36c9c 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2080,6 +2080,10 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); + if (err) + ext4_warning(sb, "error (%d) occurred during " + "file system resize", err); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", + ext4_blocks_count(es)); return err; } -- cgit From e0c5c576d5074b5bb7b1b4b59848c25ceb521331 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Mar 2019 10:18:47 -0600 Subject: io_uring: make io_read/write return an integer The callers all convert to an integer, and we only return 0/-ERROR anyway. Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 9071fca118a4..caf39663466f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -893,7 +893,7 @@ static int io_import_iovec(struct io_ring_ctx *ctx, int rw, opcode = READ_ONCE(sqe->opcode); if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { - ssize_t ret = io_import_fixed(ctx, rw, sqe, iter); + int ret = io_import_fixed(ctx, rw, sqe, iter); *iovec = NULL; return ret; } @@ -951,15 +951,15 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) async_list->io_end = io_end; } -static ssize_t io_read(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) +static int io_read(struct io_kiocb *req, const struct sqe_submit *s, + bool force_nonblock, struct io_submit_state *state) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; struct iov_iter iter; struct file *file; size_t iov_count; - ssize_t ret; + int ret; ret = io_prep_rw(req, s, force_nonblock, state); if (ret) @@ -1004,15 +1004,15 @@ out_fput: return ret; } -static ssize_t io_write(struct io_kiocb *req, const struct sqe_submit *s, - bool force_nonblock, struct io_submit_state *state) +static int io_write(struct io_kiocb *req, const struct sqe_submit *s, + bool force_nonblock, struct io_submit_state *state) { struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct kiocb *kiocb = &req->rw; struct iov_iter iter; struct file *file; size_t iov_count; - ssize_t ret; + int ret; ret = io_prep_rw(req, s, force_nonblock, state); if (ret) @@ -1396,8 +1396,7 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct sqe_submit *s, bool force_nonblock, struct io_submit_state *state) { - ssize_t ret; - int opcode; + int ret, opcode; if (unlikely(s->index >= ctx->sq_entries)) return -EINVAL; @@ -1623,7 +1622,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, struct io_submit_state *state) { struct io_kiocb *req; - ssize_t ret; + int ret; /* enforce forwards compatibility on users */ if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE)) -- cgit From d530a402a114efcf6d2b88d7f628856dade5b90b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Mar 2019 12:15:01 -0600 Subject: io_uring: add prepped flag We currently use the fact that if ->ki_filp is already set, then we've done the prep. In preparation for moving the file assignment earlier, use a separate flag to tell whether the request has been prepped for IO or not. Signed-off-by: Jens Axboe --- fs/io_uring.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index caf39663466f..d259e8a6cb2e 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -214,6 +214,7 @@ struct io_kiocb { #define REQ_F_IOPOLL_COMPLETED 2 /* polled IO has completed */ #define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */ +#define REQ_F_PREPPED 16 /* prep already done */ u64 user_data; u64 error; @@ -741,7 +742,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, int fd, ret; /* For -EAGAIN retry, everything is already prepped */ - if (kiocb->ki_filp) + if (req->flags & REQ_F_PREPPED) return 0; flags = READ_ONCE(sqe->flags); @@ -799,6 +800,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, } kiocb->ki_complete = io_complete_rw; } + req->flags |= REQ_F_PREPPED; return 0; out_fput: if (!(flags & IOSQE_FIXED_FILE)) { @@ -1099,8 +1101,8 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) unsigned flags; int fd; - /* Prep already done */ - if (req->rw.ki_filp) + /* Prep already done (EAGAIN retry) */ + if (req->flags & REQ_F_PREPPED) return 0; if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) @@ -1122,6 +1124,7 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -EBADF; } + req->flags |= REQ_F_PREPPED; return 0; } @@ -1632,8 +1635,6 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(!req)) return -EAGAIN; - req->rw.ki_filp = NULL; - ret = __io_submit_sqe(ctx, req, s, true, state); if (ret == -EAGAIN) { struct io_uring_sqe *sqe_copy; -- cgit From 09bb839434bd845c01da3d159b0c126fe7fa90da Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 13 Mar 2019 12:39:28 -0600 Subject: io_uring: fix fget/fput handling This isn't a straight port of commit 84c4e1f89fef for aio.c, since io_uring doesn't use files in exactly the same way. But it's pretty close. See the commit message for that commit. This essentially fixes a use-after-free with the poll command handling, but it takes cue from Linus's approach to just simplifying the file handling. We move the setup of the file into a higher level location, so the individual commands don't have to deal with it. And then we release the reference when we free the associated io_kiocb. Fixes: 221c5eb23382 ("io_uring: add support for IORING_OP_POLL") Signed-off-by: Jens Axboe --- fs/io_uring.c | 230 +++++++++++++++++++++++++--------------------------------- 1 file changed, 97 insertions(+), 133 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index d259e8a6cb2e..c08fa62e1978 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -189,6 +189,10 @@ struct sqe_submit { bool needs_fixed_file; }; +/* + * First field must be the file pointer in all the + * iocb unions! See also 'struct kiocb' in + */ struct io_poll_iocb { struct file *file; struct wait_queue_head *head; @@ -198,8 +202,15 @@ struct io_poll_iocb { struct wait_queue_entry wait; }; +/* + * NOTE! Each of the iocb union members has the file pointer + * as the first entry in their struct definition. So you can + * access the file pointer through any of the sub-structs, + * or directly as just 'ki_filp' in this struct. + */ struct io_kiocb { union { + struct file *file; struct kiocb rw; struct io_poll_iocb poll; }; @@ -431,6 +442,8 @@ static void io_free_req_many(struct io_ring_ctx *ctx, void **reqs, int *nr) static void io_free_req(struct io_kiocb *req) { + if (req->file && !(req->flags & REQ_F_FIXED_FILE)) + fput(req->file); io_ring_drop_ctx_refs(req->ctx, 1); kmem_cache_free(req_cachep, req); } @@ -448,45 +461,34 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, struct list_head *done) { void *reqs[IO_IOPOLL_BATCH]; - int file_count, to_free; - struct file *file = NULL; struct io_kiocb *req; + int to_free; - file_count = to_free = 0; + to_free = 0; while (!list_empty(done)) { req = list_first_entry(done, struct io_kiocb, list); list_del(&req->list); io_cqring_fill_event(ctx, req->user_data, req->error, 0); - - if (refcount_dec_and_test(&req->refs)) - reqs[to_free++] = req; (*nr_events)++; - /* - * Batched puts of the same file, to avoid dirtying the - * file usage count multiple times, if avoidable. - */ - if (!(req->flags & REQ_F_FIXED_FILE)) { - if (!file) { - file = req->rw.ki_filp; - file_count = 1; - } else if (file == req->rw.ki_filp) { - file_count++; + if (refcount_dec_and_test(&req->refs)) { + /* If we're not using fixed files, we have to pair the + * completion part with the file put. Use regular + * completions for those, only batch free for fixed + * file. + */ + if (req->flags & REQ_F_FIXED_FILE) { + reqs[to_free++] = req; + if (to_free == ARRAY_SIZE(reqs)) + io_free_req_many(ctx, reqs, &to_free); } else { - fput_many(file, file_count); - file = req->rw.ki_filp; - file_count = 1; + io_free_req(req); } } - - if (to_free == ARRAY_SIZE(reqs)) - io_free_req_many(ctx, reqs, &to_free); } - io_commit_cqring(ctx); - if (file) - fput_many(file, file_count); + io_commit_cqring(ctx); io_free_req_many(ctx, reqs, &to_free); } @@ -609,19 +611,12 @@ static void kiocb_end_write(struct kiocb *kiocb) } } -static void io_fput(struct io_kiocb *req) -{ - if (!(req->flags & REQ_F_FIXED_FILE)) - fput(req->rw.ki_filp); -} - static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw); kiocb_end_write(kiocb); - io_fput(req); io_cqring_add_event(req->ctx, req->user_data, res, 0); io_put_req(req); } @@ -738,31 +733,18 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, const struct io_uring_sqe *sqe = s->sqe; struct io_ring_ctx *ctx = req->ctx; struct kiocb *kiocb = &req->rw; - unsigned ioprio, flags; - int fd, ret; + unsigned ioprio; + int ret; + if (!req->file) + return -EBADF; /* For -EAGAIN retry, everything is already prepped */ if (req->flags & REQ_F_PREPPED) return 0; - flags = READ_ONCE(sqe->flags); - fd = READ_ONCE(sqe->fd); + if (force_nonblock && !io_file_supports_async(req->file)) + force_nonblock = false; - if (flags & IOSQE_FIXED_FILE) { - if (unlikely(!ctx->user_files || - (unsigned) fd >= ctx->nr_user_files)) - return -EBADF; - kiocb->ki_filp = ctx->user_files[fd]; - req->flags |= REQ_F_FIXED_FILE; - } else { - if (s->needs_fixed_file) - return -EBADF; - kiocb->ki_filp = io_file_get(state, fd); - if (unlikely(!kiocb->ki_filp)) - return -EBADF; - if (force_nonblock && !io_file_supports_async(kiocb->ki_filp)) - force_nonblock = false; - } kiocb->ki_pos = READ_ONCE(sqe->off); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); @@ -771,7 +753,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, if (ioprio) { ret = ioprio_check_cap(ioprio); if (ret) - goto out_fput; + return ret; kiocb->ki_ioprio = ioprio; } else @@ -779,39 +761,26 @@ static int io_prep_rw(struct io_kiocb *req, const struct sqe_submit *s, ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); if (unlikely(ret)) - goto out_fput; + return ret; if (force_nonblock) { kiocb->ki_flags |= IOCB_NOWAIT; req->flags |= REQ_F_FORCE_NONBLOCK; } if (ctx->flags & IORING_SETUP_IOPOLL) { - ret = -EOPNOTSUPP; if (!(kiocb->ki_flags & IOCB_DIRECT) || !kiocb->ki_filp->f_op->iopoll) - goto out_fput; + return -EOPNOTSUPP; req->error = 0; kiocb->ki_flags |= IOCB_HIPRI; kiocb->ki_complete = io_complete_rw_iopoll; } else { - if (kiocb->ki_flags & IOCB_HIPRI) { - ret = -EINVAL; - goto out_fput; - } + if (kiocb->ki_flags & IOCB_HIPRI) + return -EINVAL; kiocb->ki_complete = io_complete_rw; } req->flags |= REQ_F_PREPPED; return 0; -out_fput: - if (!(flags & IOSQE_FIXED_FILE)) { - /* - * in case of error, we didn't use this file reference. drop it. - */ - if (state) - state->used_refs--; - io_file_put(state, kiocb->ki_filp); - } - return ret; } static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) @@ -968,16 +937,14 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, return ret; file = kiocb->ki_filp; - ret = -EBADF; if (unlikely(!(file->f_mode & FMODE_READ))) - goto out_fput; - ret = -EINVAL; + return -EBADF; if (unlikely(!file->f_op->read_iter)) - goto out_fput; + return -EINVAL; ret = io_import_iovec(req->ctx, READ, s, &iovec, &iter); if (ret) - goto out_fput; + return ret; iov_count = iov_iter_count(&iter); ret = rw_verify_area(READ, file, &kiocb->ki_pos, iov_count); @@ -999,10 +966,6 @@ static int io_read(struct io_kiocb *req, const struct sqe_submit *s, } } kfree(iovec); -out_fput: - /* Hold on to the file for -EAGAIN */ - if (unlikely(ret && ret != -EAGAIN)) - io_fput(req); return ret; } @@ -1020,17 +983,15 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, if (ret) return ret; - ret = -EBADF; file = kiocb->ki_filp; if (unlikely(!(file->f_mode & FMODE_WRITE))) - goto out_fput; - ret = -EINVAL; + return -EBADF; if (unlikely(!file->f_op->write_iter)) - goto out_fput; + return -EINVAL; ret = io_import_iovec(req->ctx, WRITE, s, &iovec, &iter); if (ret) - goto out_fput; + return ret; iov_count = iov_iter_count(&iter); @@ -1062,10 +1023,6 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, } out_free: kfree(iovec); -out_fput: - /* Hold on to the file for -EAGAIN */ - if (unlikely(ret && ret != -EAGAIN)) - io_fput(req); return ret; } @@ -1080,16 +1037,6 @@ static int io_nop(struct io_kiocb *req, u64 user_data) if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; - /* - * Twilight zone - it's possible that someone issued an opcode that - * has a file attached, then got -EAGAIN on submission, and changed - * the sqe before we retried it from async context. Avoid dropping - * a file reference for this malicious case, and flag the error. - */ - if (req->rw.ki_filp) { - err = -EBADF; - io_fput(req); - } io_cqring_add_event(ctx, user_data, err, 0); io_put_req(req); return 0; @@ -1098,9 +1045,9 @@ static int io_nop(struct io_kiocb *req, u64 user_data) static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_ring_ctx *ctx = req->ctx; - unsigned flags; - int fd; + if (!req->file) + return -EBADF; /* Prep already done (EAGAIN retry) */ if (req->flags & REQ_F_PREPPED) return 0; @@ -1110,20 +1057,6 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) return -EINVAL; - fd = READ_ONCE(sqe->fd); - flags = READ_ONCE(sqe->flags); - - if (flags & IOSQE_FIXED_FILE) { - if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files)) - return -EBADF; - req->rw.ki_filp = ctx->user_files[fd]; - req->flags |= REQ_F_FIXED_FILE; - } else { - req->rw.ki_filp = fget(fd); - if (unlikely(!req->rw.ki_filp)) - return -EBADF; - } - req->flags |= REQ_F_PREPPED; return 0; } @@ -1153,7 +1086,6 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, end > 0 ? end : LLONG_MAX, fsync_flags & IORING_FSYNC_DATASYNC); - io_fput(req); io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); io_put_req(req); return 0; @@ -1220,7 +1152,6 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) static void io_poll_complete(struct io_kiocb *req, __poll_t mask) { io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0); - io_fput(req); io_put_req(req); } @@ -1314,34 +1245,20 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_poll_iocb *poll = &req->poll; struct io_ring_ctx *ctx = req->ctx; struct io_poll_table ipt; - unsigned flags; __poll_t mask; u16 events; - int fd; if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL)) return -EINVAL; if (sqe->addr || sqe->ioprio || sqe->off || sqe->len || sqe->buf_index) return -EINVAL; + if (!poll->file) + return -EBADF; INIT_WORK(&req->work, io_poll_complete_work); events = READ_ONCE(sqe->poll_events); poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; - flags = READ_ONCE(sqe->flags); - fd = READ_ONCE(sqe->fd); - - if (flags & IOSQE_FIXED_FILE) { - if (unlikely(!ctx->user_files || fd >= ctx->nr_user_files)) - return -EBADF; - poll->file = ctx->user_files[fd]; - req->flags |= REQ_F_FIXED_FILE; - } else { - poll->file = fget(fd); - } - if (unlikely(!poll->file)) - return -EBADF; - poll->head = NULL; poll->woken = false; poll->canceled = false; @@ -1380,8 +1297,6 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) out: if (unlikely(ipt.error)) { - if (!(flags & IOSQE_FIXED_FILE)) - fput(poll->file); /* * Drop one of our refs to this req, __io_submit_sqe() will * drop the other one since we're returning an error. @@ -1621,6 +1536,50 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req) return ret; } +static bool io_op_needs_file(const struct io_uring_sqe *sqe) +{ + int op = READ_ONCE(sqe->opcode); + + switch (op) { + case IORING_OP_NOP: + case IORING_OP_POLL_REMOVE: + return false; + default: + return true; + } +} + +static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, + struct io_submit_state *state, struct io_kiocb *req) +{ + unsigned flags; + int fd; + + flags = READ_ONCE(s->sqe->flags); + fd = READ_ONCE(s->sqe->fd); + + if (!io_op_needs_file(s->sqe)) { + req->file = NULL; + return 0; + } + + if (flags & IOSQE_FIXED_FILE) { + if (unlikely(!ctx->user_files || + (unsigned) fd >= ctx->nr_user_files)) + return -EBADF; + req->file = ctx->user_files[fd]; + req->flags |= REQ_F_FIXED_FILE; + } else { + if (s->needs_fixed_file) + return -EBADF; + req->file = io_file_get(state, fd); + if (unlikely(!req->file)) + return -EBADF; + } + + return 0; +} + static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, struct io_submit_state *state) { @@ -1635,6 +1594,10 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(!req)) return -EAGAIN; + ret = io_req_set_file(ctx, s, state, req); + if (unlikely(ret)) + goto out; + ret = __io_submit_sqe(ctx, req, s, true, state); if (ret == -EAGAIN) { struct io_uring_sqe *sqe_copy; @@ -1664,6 +1627,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, } } +out: /* drop submission reference */ io_put_req(req); -- cgit From 9804501fa1228048857910a6bf23e085aade37cc Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 14 Mar 2019 13:47:59 +0800 Subject: appletalk: Fix potential NULL pointer dereference in unregister_snap_client register_snap_client may return NULL, all the callers check it, but only print a warning. This will result in NULL pointer dereference in unregister_snap_client and other places. It has always been used like this since v2.6 Reported-by: Dan Carpenter Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- include/linux/atalk.h | 2 +- net/appletalk/aarp.c | 15 ++++++++++++--- net/appletalk/ddp.c | 20 ++++++++++++-------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index d5cfc0b15b76..f6034ba774be 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -108,7 +108,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb) #define AARP_RESOLVE_TIME (10 * HZ) extern struct datalink_proto *ddp_dl, *aarp_dl; -extern void aarp_proto_init(void); +extern int aarp_proto_init(void); /* Inter module exports */ diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c index 49a16cee2aae..420a98bf79b5 100644 --- a/net/appletalk/aarp.c +++ b/net/appletalk/aarp.c @@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = { static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 }; -void __init aarp_proto_init(void) +int __init aarp_proto_init(void) { + int rc; + aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv); - if (!aarp_dl) + if (!aarp_dl) { printk(KERN_CRIT "Unable to register AARP with SNAP.\n"); + return -ENOMEM; + } timer_setup(&aarp_timer, aarp_expire_timeout, 0); aarp_timer.expires = jiffies + sysctl_aarp_expiry_time; add_timer(&aarp_timer); - register_netdevice_notifier(&aarp_notifier); + rc = register_netdevice_notifier(&aarp_notifier); + if (rc) { + del_timer_sync(&aarp_timer); + unregister_snap_client(aarp_dl); + } + return rc; } /* Remove the AARP entries associated with a device. */ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 795fbc6c06aa..709d2542f729 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1904,9 +1904,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B }; EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static const char atalk_err_snap[] __initconst = - KERN_CRIT "Unable to register DDP with SNAP.\n"; - /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { @@ -1921,17 +1918,22 @@ static int __init atalk_init(void) goto out_proto; ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); - if (!ddp_dl) - printk(atalk_err_snap); + if (!ddp_dl) { + pr_crit("Unable to register DDP with SNAP.\n"); + goto out_sock; + } dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); rc = register_netdevice_notifier(&ddp_notifier); if (rc) - goto out_sock; + goto out_snap; + + rc = aarp_proto_init(); + if (rc) + goto out_dev; - aarp_proto_init(); rc = atalk_proc_init(); if (rc) goto out_aarp; @@ -1945,11 +1947,13 @@ out_proc: atalk_proc_exit(); out_aarp: aarp_cleanup_module(); +out_dev: unregister_netdevice_notifier(&ddp_notifier); -out_sock: +out_snap: dev_remove_pack(&ppptalk_packet_type); dev_remove_pack(<alk_packet_type); unregister_snap_client(ddp_dl); +out_sock: sock_unregister(PF_APPLETALK); out_proto: proto_unregister(&ddp_proto); -- cgit From 80acbed9f8fca1db3fbe915540b756f048aa0fd7 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 14 Mar 2019 21:43:19 +0200 Subject: net: stmmac: don't set own bit too early for jumbo frames Commit 0e80bdc9a72d ("stmmac: first frame prep at the end of xmit routine") overlooked jumbo frames when re-ordering the code, and as a result the own bit was not getting set anymore for the first jumbo frame descriptor. Commit 487e2e22ab79 ("net: stmmac: Set OWN bit for jumbo frames") tried to fix this, but now the bit is getting set too early and the DMA may start while we are still setting up the remaining descriptors. And with the chain mode the own bit remains still unset. Fix by setting the own bit at the end of xmit also with jumbo frames. Fixes: 0e80bdc9a72d ("stmmac: first frame prep at the end of xmit routine") Fixes: 487e2e22ab79 ("net: stmmac: Set OWN bit for jumbo frames") Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 4 ++-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 14 ++++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index d8c5bc412219..bc83ced94e1b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -59,7 +59,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, bmax, csum, - STMMAC_RING_MODE, 1, false, skb->len); + STMMAC_RING_MODE, 0, false, skb->len); tx_q->tx_skbuff[entry] = NULL; entry = STMMAC_GET_ENTRY(entry, DMA_TX_SIZE); @@ -91,7 +91,7 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum, - STMMAC_RING_MODE, 1, true, skb->len); + STMMAC_RING_MODE, 0, true, skb->len); } tx_q->cur_tx = entry; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 97c5e1aad88f..6a2e1031a62a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -3216,14 +3216,16 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) stmmac_prepare_tx_desc(priv, first, 1, nopaged_len, csum_insertion, priv->mode, 1, last_segment, skb->len); - - /* The own bit must be the latest setting done when prepare the - * descriptor and then barrier is needed to make sure that - * all is coherent before granting the DMA engine. - */ - wmb(); + } else { + stmmac_set_tx_owner(priv, first); } + /* The own bit must be the latest setting done when prepare the + * descriptor and then barrier is needed to make sure that + * all is coherent before granting the DMA engine. + */ + wmb(); + netdev_tx_sent_queue(netdev_get_tx_queue(dev, queue), skb->len); stmmac_enable_dma_transmission(priv, priv->ioaddr); -- cgit From 58f2ce6f61615dfd8dd3cc01c9e5bb54ed35637e Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Thu, 14 Mar 2019 21:43:20 +0200 Subject: net: stmmac: fix jumbo frame sending with non-linear skbs When sending non-linear skbs with jumbo frames, we set up the non-paged data and mark that as a last segment, although the paged fragments are also prepared. This will stall the TX queue and trigger a watchdog warning (a simple reproducer is to run an iperf client mode TCP test with a large MTU - networking fails instantly). Fix by checking if the skb is non-linear. Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index bc83ced94e1b..f936166d8910 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -79,7 +79,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 0, len, csum, - STMMAC_RING_MODE, 1, true, skb->len); + STMMAC_RING_MODE, 1, !skb_is_nonlinear(skb), + skb->len); } else { des2 = dma_map_single(priv->device, skb->data, nopaged_len, DMA_TO_DEVICE); @@ -91,7 +92,8 @@ static int jumbo_frm(void *p, struct sk_buff *skb, int csum) tx_q->tx_skbuff_dma[entry].is_jumbo = true; desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB); stmmac_prepare_tx_desc(priv, desc, 1, nopaged_len, csum, - STMMAC_RING_MODE, 0, true, skb->len); + STMMAC_RING_MODE, 0, !skb_is_nonlinear(skb), + skb->len); } tx_q->cur_tx = entry; -- cgit From 5bf7295fe34a5251b1d241b9736af4697b590670 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Thu, 14 Mar 2019 15:31:40 -0500 Subject: qlcnic: Avoid potential NULL pointer dereference netdev_alloc_skb can fail and return a NULL pointer which is dereferenced without a check. The patch avoids such a scenario. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c index 3b0adda7cc9c..a4cd6f2cfb86 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c @@ -1048,6 +1048,8 @@ int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode) for (i = 0; i < QLCNIC_NUM_ILB_PKT; i++) { skb = netdev_alloc_skb(adapter->netdev, QLCNIC_ILB_PKT_SIZE); + if (!skb) + break; qlcnic_create_loopback_buff(skb->data, adapter->mac_addr); skb_put(skb, QLCNIC_ILB_PKT_SIZE); adapter->ahw->diag_cnt = 0; -- cgit From eab2fc822af38f31fd5f4e731b5d10b94904d919 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 14 Mar 2019 23:08:22 +0100 Subject: sch_cake: Interpret fwmark parameter as a bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We initially interpreted the fwmark parameter as a flag that simply turned on the feature, using the whole skb->mark field as the index into the CAKE tin_order array. However, it is quite common for different applications to use different parts of the mask field for their own purposes, each using a different mask. Support this use of subsets of the mark by interpreting the TCA_CAKE_FWMARK parameter as a bitmask to apply to the fwmark field when reading it. The result will be right-shifted by the number of unset lower bits of the mask before looking up the tin. In the original commit message we also failed to credit Felix Resch with originally suggesting the fwmark feature back in 2017; so the Suggested-By in this commit covers the whole fwmark feature. Fixes: 0b5c7efdfc6e ("sch_cake: Permit use of connmarks as tin classifiers") Suggested-by: Felix Resch Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: David S. Miller --- net/sched/sch_cake.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 1d2a12132abc..acc9b9da985f 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -211,6 +211,9 @@ struct cake_sched_data { u8 ack_filter; u8 atm_mode; + u32 fwmark_mask; + u16 fwmark_shft; + /* time_next = time_this + ((len * rate_ns) >> rate_shft) */ u16 rate_shft; ktime_t time_next_packet; @@ -258,8 +261,7 @@ enum { CAKE_FLAG_AUTORATE_INGRESS = BIT(1), CAKE_FLAG_INGRESS = BIT(2), CAKE_FLAG_WASH = BIT(3), - CAKE_FLAG_SPLIT_GSO = BIT(4), - CAKE_FLAG_FWMARK = BIT(5) + CAKE_FLAG_SPLIT_GSO = BIT(4) }; /* COBALT operates the Codel and BLUE algorithms in parallel, in order to @@ -1543,7 +1545,7 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, struct sk_buff *skb) { struct cake_sched_data *q = qdisc_priv(sch); - u32 tin; + u32 tin, mark; u8 dscp; /* Tin selection: Default to diffserv-based selection, allow overriding @@ -1551,14 +1553,13 @@ static struct cake_tin_data *cake_select_tin(struct Qdisc *sch, */ dscp = cake_handle_diffserv(skb, q->rate_flags & CAKE_FLAG_WASH); + mark = (skb->mark & q->fwmark_mask) >> q->fwmark_shft; if (q->tin_mode == CAKE_DIFFSERV_BESTEFFORT) tin = 0; - else if (q->rate_flags & CAKE_FLAG_FWMARK && /* use fw mark */ - skb->mark && - skb->mark <= q->tin_cnt) - tin = q->tin_order[skb->mark - 1]; + else if (mark && mark <= q->tin_cnt) + tin = q->tin_order[mark - 1]; else if (TC_H_MAJ(skb->priority) == sch->handle && TC_H_MIN(skb->priority) > 0 && @@ -2172,6 +2173,7 @@ static const struct nla_policy cake_policy[TCA_CAKE_MAX + 1] = { [TCA_CAKE_MPU] = { .type = NLA_U32 }, [TCA_CAKE_INGRESS] = { .type = NLA_U32 }, [TCA_CAKE_ACK_FILTER] = { .type = NLA_U32 }, + [TCA_CAKE_FWMARK] = { .type = NLA_U32 }, }; static void cake_set_rate(struct cake_tin_data *b, u64 rate, u32 mtu, @@ -2619,10 +2621,8 @@ static int cake_change(struct Qdisc *sch, struct nlattr *opt, } if (tb[TCA_CAKE_FWMARK]) { - if (!!nla_get_u32(tb[TCA_CAKE_FWMARK])) - q->rate_flags |= CAKE_FLAG_FWMARK; - else - q->rate_flags &= ~CAKE_FLAG_FWMARK; + q->fwmark_mask = nla_get_u32(tb[TCA_CAKE_FWMARK]); + q->fwmark_shft = q->fwmark_mask ? __ffs(q->fwmark_mask) : 0; } if (q->tins) { @@ -2784,8 +2784,7 @@ static int cake_dump(struct Qdisc *sch, struct sk_buff *skb) !!(q->rate_flags & CAKE_FLAG_SPLIT_GSO))) goto nla_put_failure; - if (nla_put_u32(skb, TCA_CAKE_FWMARK, - !!(q->rate_flags & CAKE_FLAG_FWMARK))) + if (nla_put_u32(skb, TCA_CAKE_FWMARK, q->fwmark_mask)) goto nla_put_failure; return nla_nest_end(skb, opts); -- cgit From 3d4c3cec0909dc9c40db82a74aae0cdf3f5ad138 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Mar 2019 23:47:13 +0000 Subject: drivers: net: atp: fix various indentation issues There is a statement that is indented incorrectly; replace spaces with a tab. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/atp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c index cfb67b746595..58e0ca9093d3 100644 --- a/drivers/net/ethernet/realtek/atp.c +++ b/drivers/net/ethernet/realtek/atp.c @@ -482,7 +482,7 @@ static void hardware_init(struct net_device *dev) write_reg_high(ioaddr, IMR, ISRh_RxErr); lp->tx_unit_busy = 0; - lp->pac_cnt_in_tx_buf = 0; + lp->pac_cnt_in_tx_buf = 0; lp->saved_tx_size = 0; } -- cgit From 68cfe9a286f3ee2371de00ab666b4949ff285196 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Mar 2019 23:56:35 +0000 Subject: net: sis900: fix indentation issues, remove some spaces There are several statements that contain extra spacing in the indentation; clean this up by removing spaces. Also add { } braces on if statement to keep to kernel coding style. Signed-off-by: Colin Ian King Signed-off-by: David S. Miller --- drivers/net/ethernet/sis/sis900.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 6073387511f8..67f9bb6e941b 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -730,10 +730,10 @@ static u16 sis900_default_phy(struct net_device * net_dev) status = mdio_read(net_dev, phy->phy_addr, MII_STATUS); /* Link ON & Not select default PHY & not ghost PHY */ - if ((status & MII_STAT_LINK) && !default_phy && - (phy->phy_types != UNKNOWN)) - default_phy = phy; - else { + if ((status & MII_STAT_LINK) && !default_phy && + (phy->phy_types != UNKNOWN)) { + default_phy = phy; + } else { status = mdio_read(net_dev, phy->phy_addr, MII_CONTROL); mdio_write(net_dev, phy->phy_addr, MII_CONTROL, status | MII_CNTL_AUTO | MII_CNTL_ISOLATE); @@ -741,7 +741,7 @@ static u16 sis900_default_phy(struct net_device * net_dev) phy_home = phy; else if(phy->phy_types == LAN) phy_lan = phy; - } + } } if (!default_phy && phy_home) -- cgit From 228cd2dba27cee9956c1af97e6445be056881e41 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 23:12:06 -0500 Subject: net: strparser: fix a missing check for create_singlethread_workqueue In case create_singlethread_workqueue fails, the check returns an error to callers to avoid potential NULL pointer dereferences. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/strparser/strparser.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index da1a676860ca..860dcfb95ee4 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -550,6 +550,8 @@ EXPORT_SYMBOL_GPL(strp_check_rcv); static int __init strp_mod_init(void) { strp_wq = create_singlethread_workqueue("kstrp"); + if (unlikely(!strp_wq)) + return -ENOMEM; return 0; } -- cgit From 8c838788775a593527803786d376393b7c28f589 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 12 Mar 2019 15:48:16 -0600 Subject: io_uring: fix poll races This is a straight port of Al's fix for the aio poll implementation, since the io_uring version is heavily based on that. The below description is almost straight from that patch, just modified to fit the io_uring situation. io_poll() has to cope with several unpleasant problems: * requests that might stay around indefinitely need to be made visible for io_cancel(2); that must not be done to a request already completed, though. * in cases when ->poll() has placed us on a waitqueue, wakeup might have happened (and request completed) before ->poll() returns. * worse, in some early wakeup cases request might end up re-added into the queue later - we can't treat "woken up and currently not in the queue" as "it's not going to stick around indefinitely" * ... moreover, ->poll() might have decided not to put it on any queues to start with, and that needs to be distinguished from the previous case * ->poll() might have tried to put us on more than one queue. Only the first will succeed for io poll, so we might end up missing wakeups. OTOH, we might very well notice that only after the wakeup hits and request gets completed (all before ->poll() gets around to the second poll_wait()). In that case it's too late to decide that we have an error. req->woken was an attempt to deal with that. Unfortunately, it was broken. What we need to keep track of is not that wakeup has happened - the thing might come back after that. It's that async reference is already gone and won't come back, so we can't (and needn't) put the request on the list of cancellables. The easiest case is "request hadn't been put on any waitqueues"; we can tell by seeing NULL apt.head, and in that case there won't be anything async. We should either complete the request ourselves (if vfs_poll() reports anything of interest) or return an error. In all other cases we get exclusion with wakeups by grabbing the queue lock. If request is currently on queue and we have something interesting from vfs_poll(), we can steal it and complete the request ourselves. If it's on queue and vfs_poll() has not reported anything interesting, we either put it on the cancellable list, or, if we know that it hadn't been put on all queues ->poll() wanted it on, we steal it and return an error. If it's _not_ on queue, it's either been already dealt with (in which case we do nothing), or there's io_poll_complete_work() about to be executed. In that case we either put it on the cancellable list, or, if we know it hadn't been put on all queues ->poll() wanted it on, simulate what cancel would've done. Fixes: 221c5eb23382 ("io_uring: add support for IORING_OP_POLL") Signed-off-by: Jens Axboe --- fs/io_uring.c | 111 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 56 insertions(+), 55 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index c08fa62e1978..12bb238aed6b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -197,7 +197,7 @@ struct io_poll_iocb { struct file *file; struct wait_queue_head *head; __poll_t events; - bool woken; + bool done; bool canceled; struct wait_queue_entry wait; }; @@ -367,20 +367,25 @@ static void io_cqring_fill_event(struct io_ring_ctx *ctx, u64 ki_user_data, } } -static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 ki_user_data, +static void io_cqring_ev_posted(struct io_ring_ctx *ctx) +{ + if (waitqueue_active(&ctx->wait)) + wake_up(&ctx->wait); + if (waitqueue_active(&ctx->sqo_wait)) + wake_up(&ctx->sqo_wait); +} + +static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, long res, unsigned ev_flags) { unsigned long flags; spin_lock_irqsave(&ctx->completion_lock, flags); - io_cqring_fill_event(ctx, ki_user_data, res, ev_flags); + io_cqring_fill_event(ctx, user_data, res, ev_flags); io_commit_cqring(ctx); spin_unlock_irqrestore(&ctx->completion_lock, flags); - if (waitqueue_active(&ctx->wait)) - wake_up(&ctx->wait); - if (waitqueue_active(&ctx->sqo_wait)) - wake_up(&ctx->sqo_wait); + io_cqring_ev_posted(ctx); } static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) @@ -1149,10 +1154,12 @@ static int io_poll_remove(struct io_kiocb *req, const struct io_uring_sqe *sqe) return 0; } -static void io_poll_complete(struct io_kiocb *req, __poll_t mask) +static void io_poll_complete(struct io_ring_ctx *ctx, struct io_kiocb *req, + __poll_t mask) { - io_cqring_add_event(req->ctx, req->user_data, mangle_poll(mask), 0); - io_put_req(req); + req->poll.done = true; + io_cqring_fill_event(ctx, req->user_data, mangle_poll(mask), 0); + io_commit_cqring(ctx); } static void io_poll_complete_work(struct work_struct *work) @@ -1180,9 +1187,11 @@ static void io_poll_complete_work(struct work_struct *work) return; } list_del_init(&req->list); + io_poll_complete(ctx, req, mask); spin_unlock_irq(&ctx->completion_lock); - io_poll_complete(req, mask); + io_cqring_ev_posted(ctx); + io_put_req(req); } static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, @@ -1193,29 +1202,25 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync, struct io_kiocb *req = container_of(poll, struct io_kiocb, poll); struct io_ring_ctx *ctx = req->ctx; __poll_t mask = key_to_poll(key); - - poll->woken = true; + unsigned long flags; /* for instances that support it check for an event match first: */ - if (mask) { - unsigned long flags; + if (mask && !(mask & poll->events)) + return 0; - if (!(mask & poll->events)) - return 0; + list_del_init(&poll->wait.entry); - /* try to complete the iocb inline if we can: */ - if (spin_trylock_irqsave(&ctx->completion_lock, flags)) { - list_del(&req->list); - spin_unlock_irqrestore(&ctx->completion_lock, flags); + if (mask && spin_trylock_irqsave(&ctx->completion_lock, flags)) { + list_del(&req->list); + io_poll_complete(ctx, req, mask); + spin_unlock_irqrestore(&ctx->completion_lock, flags); - list_del_init(&poll->wait.entry); - io_poll_complete(req, mask); - return 1; - } + io_cqring_ev_posted(ctx); + io_put_req(req); + } else { + queue_work(ctx->sqo_wq, &req->work); } - list_del_init(&poll->wait.entry); - queue_work(ctx->sqo_wq, &req->work); return 1; } @@ -1245,6 +1250,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) struct io_poll_iocb *poll = &req->poll; struct io_ring_ctx *ctx = req->ctx; struct io_poll_table ipt; + bool cancel = false; __poll_t mask; u16 events; @@ -1260,7 +1266,7 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) poll->events = demangle_poll(events) | EPOLLERR | EPOLLHUP; poll->head = NULL; - poll->woken = false; + poll->done = false; poll->canceled = false; ipt.pt._qproc = io_poll_queue_proc; @@ -1273,41 +1279,36 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) init_waitqueue_func_entry(&poll->wait, io_poll_wake); mask = vfs_poll(poll->file, &ipt.pt) & poll->events; - if (unlikely(!poll->head)) { - /* we did not manage to set up a waitqueue, done */ - goto out; - } spin_lock_irq(&ctx->completion_lock); - spin_lock(&poll->head->lock); - if (poll->woken) { - /* wake_up context handles the rest */ - mask = 0; + if (likely(poll->head)) { + spin_lock(&poll->head->lock); + if (unlikely(list_empty(&poll->wait.entry))) { + if (ipt.error) + cancel = true; + ipt.error = 0; + mask = 0; + } + if (mask || ipt.error) + list_del_init(&poll->wait.entry); + else if (cancel) + WRITE_ONCE(poll->canceled, true); + else if (!poll->done) /* actually waiting for an event */ + list_add_tail(&req->list, &ctx->cancel_list); + spin_unlock(&poll->head->lock); + } + if (mask) { /* no async, we'd stolen it */ + req->error = mangle_poll(mask); ipt.error = 0; - } else if (mask || ipt.error) { - /* if we get an error or a mask we are done */ - WARN_ON_ONCE(list_empty(&poll->wait.entry)); - list_del_init(&poll->wait.entry); - } else { - /* actually waiting for an event */ - list_add_tail(&req->list, &ctx->cancel_list); + io_poll_complete(ctx, req, mask); } - spin_unlock(&poll->head->lock); spin_unlock_irq(&ctx->completion_lock); -out: - if (unlikely(ipt.error)) { - /* - * Drop one of our refs to this req, __io_submit_sqe() will - * drop the other one since we're returning an error. - */ + if (mask) { + io_cqring_ev_posted(ctx); io_put_req(req); - return ipt.error; } - - if (mask) - io_poll_complete(req, mask); - return 0; + return ipt.error; } static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, -- cgit From 8a3c245c031944f2176118270e7bc5d4fd4a1075 Mon Sep 17 00:00:00 2001 From: Pedro Tammela Date: Thu, 14 Mar 2019 10:45:23 -0300 Subject: net: add documentation to socket.c Adds missing sphinx documentation to the socket.c's functions. Also fixes some whitespaces. I also changed the style of older documentation as an effort to have an uniform documentation style. Signed-off-by: Pedro Tammela Signed-off-by: David S. Miller --- include/linux/net.h | 6 ++ include/linux/socket.h | 12 +-- net/socket.c | 277 +++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 271 insertions(+), 24 deletions(-) diff --git a/include/linux/net.h b/include/linux/net.h index 651fca72286c..c606c72311d0 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -83,6 +83,12 @@ enum sock_type { #endif /* ARCH_HAS_SOCKET_TYPES */ +/** + * enum sock_shutdown_cmd - Shutdown types + * @SHUT_RD: shutdown receptions + * @SHUT_WR: shutdown transmissions + * @SHUT_RDWR: shutdown receptions/transmissions + */ enum sock_shutdown_cmd { SHUT_RD, SHUT_WR, diff --git a/include/linux/socket.h b/include/linux/socket.h index 6016daeecee4..b57cd8bf96e2 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -26,7 +26,7 @@ typedef __kernel_sa_family_t sa_family_t; /* * 1003.1g requires sa_family_t and that sa_data is char. */ - + struct sockaddr { sa_family_t sa_family; /* address family, AF_xxx */ char sa_data[14]; /* 14 bytes of protocol address */ @@ -44,7 +44,7 @@ struct linger { * system, not 4.3. Thus msg_accrights(len) are now missing. They * belong in an obscure libc emulation or the bin. */ - + struct msghdr { void *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -54,7 +54,7 @@ struct msghdr { unsigned int msg_flags; /* flags on received message */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ }; - + struct user_msghdr { void __user *msg_name; /* ptr to socket address structure */ int msg_namelen; /* size of socket address structure */ @@ -122,7 +122,7 @@ struct cmsghdr { * inside range, given by msg->msg_controllen before using * ancillary object DATA. --ANK (980731) */ - + static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size, struct cmsghdr *__cmsg) { @@ -264,10 +264,10 @@ struct ucred { /* Maximum queue length specifiable by listen. */ #define SOMAXCONN 128 -/* Flags we can use with send/ and recv. +/* Flags we can use with send/ and recv. Added those for 1003.1g not all are supported yet */ - + #define MSG_OOB 1 #define MSG_PEEK 2 #define MSG_DONTROUTE 4 diff --git a/net/socket.c b/net/socket.c index 3c176a12fe48..8255f5bda0aa 100644 --- a/net/socket.c +++ b/net/socket.c @@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = { * but we take care of internal coherence yet. */ +/** + * sock_alloc_file - Bind a &socket to a &file + * @sock: socket + * @flags: file status flags + * @dname: protocol name + * + * Returns the &file bound with @sock, implicitly storing it + * in sock->file. If dname is %NULL, sets to "". + * On failure the return is a ERR pointer (see linux/err.h). + * This function uses GFP_KERNEL internally. + */ + struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { struct file *file; @@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags) return PTR_ERR(newfile); } +/** + * sock_from_file - Return the &socket bounded to @file. + * @file: file + * @err: pointer to an error code return + * + * On failure returns %NULL and assigns -ENOTSOCK to @err. + */ + struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) @@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = { }; /** - * sock_alloc - allocate a socket + * sock_alloc - allocate a socket * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes - * NULL is returned. + * NULL is returned. This functions uses GFP_KERNEL internally. */ struct socket *sock_alloc(void) @@ -561,7 +581,7 @@ struct socket *sock_alloc(void) EXPORT_SYMBOL(sock_alloc); /** - * sock_release - close a socket + * sock_release - close a socket * @sock: socket to close * * The socket is released from the protocol stack if it has a release @@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags) } EXPORT_SYMBOL(__sock_tx_timestamp); +/** + * sock_sendmsg - send a message through @sock + * @sock: socket + * @msg: message to send + * + * Sends @msg through @sock, passing through LSM. + * Returns the number of bytes sent, or an error code. + */ + static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg) { int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg)); @@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg) } EXPORT_SYMBOL(sock_sendmsg); +/** + * kernel_sendmsg - send a message through @sock (kernel-space) + * @sock: socket + * @msg: message header + * @vec: kernel vec + * @num: vec array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + */ + int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +/** + * kernel_sendmsg_locked - send a message through @sock (kernel-space) + * @sk: sock + * @msg: message header + * @vec: output s/g array + * @num: output s/g array length + * @size: total message data size + * + * Builds the message data with @vec and sends it through @sock. + * Returns the number of bytes sent, or an error code. + * Caller must hold @sk. + */ + int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) { @@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); +/** + * sock_recvmsg - receive a message from @sock + * @sock: socket + * @msg: message to receive + * @flags: message flags + * + * Receives @msg from @sock, passing through LSM. Returns the total number + * of bytes received, or an error. + */ + static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, int flags) { @@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags) EXPORT_SYMBOL(sock_recvmsg); /** - * kernel_recvmsg - Receive a message from a socket (kernel space) - * @sock: The socket to receive the message from - * @msg: Received message - * @vec: Input s/g array for message data - * @num: Size of input s/g array - * @size: Number of bytes to read - * @flags: Message flags (MSG_DONTWAIT, etc...) + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) * - * On return the msg structure contains the scatter/gather array passed in the - * vec argument. The array is modified so that it consists of the unfilled - * portion of the original array. + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. * - * The returned value is the total number of bytes received, or an error. + * The returned value is the total number of bytes received, or an error. */ + int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock, * what to do with it - that's up to the protocol still. */ +/** + * get_net_ns - increment the refcount of the network namespace + * @ns: common namespace (net) + * + * Returns the net's common namespace. + */ + struct ns_common *get_net_ns(struct ns_common *ns) { return &get_net(container_of(ns, struct net, ns))->ns; @@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) return err; } +/** + * sock_create_lite - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * Creates a new socket and assigns it to @res, passing through LSM. + * The new socket initialization is not complete, see kernel_accept(). + * Returns 0 or an error. On failure @res is set to %NULL. + * This function internally uses GFP_KERNEL. + */ + int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; @@ -1224,6 +1309,21 @@ call_kill: } EXPORT_SYMBOL(sock_wake_async); +/** + * __sock_create - creates a socket + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * @kern: boolean for kernel space sockets + * + * Creates a new socket and assigns it to @res, passing through LSM. + * Returns 0 or an error. On failure @res is set to %NULL. @kern must + * be set to true if the socket resides in kernel space. + * This function internally uses GFP_KERNEL. + */ + int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { @@ -1333,12 +1433,35 @@ out_release: } EXPORT_SYMBOL(__sock_create); +/** + * sock_create - creates a socket + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } EXPORT_SYMBOL(sock_create); +/** + * sock_create_kern - creates a socket (kernel space) + * @net: net namespace + * @family: protocol family (AF_INET, ...) + * @type: communication type (SOCK_STREAM, ...) + * @protocol: protocol (0, ...) + * @res: new socket + * + * A wrapper around __sock_create(). + * Returns 0 or an error. This function internally uses GFP_KERNEL. + */ + int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res) { return __sock_create(net, family, type, protocol, res, 1); @@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd, } #endif +/** + * kernel_bind - bind an address to a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: length of address + * + * Returns 0 or an error. + */ + int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } EXPORT_SYMBOL(kernel_bind); +/** + * kernel_listen - move socket to listening state (kernel space) + * @sock: socket + * @backlog: pending connections queue size + * + * Returns 0 or an error. + */ + int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } EXPORT_SYMBOL(kernel_listen); +/** + * kernel_accept - accept a connection (kernel space) + * @sock: listening socket + * @newsock: new connected socket + * @flags: flags + * + * @flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0. + * If it fails, @newsock is guaranteed to be %NULL. + * Returns 0 or an error. + */ + int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { struct sock *sk = sock->sk; @@ -3359,6 +3510,19 @@ done: } EXPORT_SYMBOL(kernel_accept); +/** + * kernel_connect - connect a socket (kernel space) + * @sock: socket + * @addr: address + * @addrlen: address length + * @flags: flags (O_NONBLOCK, ...) + * + * For datagram sockets, @addr is the addres to which datagrams are sent + * by default, and the only address from which datagrams are received. + * For stream sockets, attempts to connect to @addr. + * Returns 0 or an error code. + */ + int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { @@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, } EXPORT_SYMBOL(kernel_connect); +/** + * kernel_getsockname - get the address which the socket is bound (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is bound. + * Returns 0 or an error code. + */ + int kernel_getsockname(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 0); } EXPORT_SYMBOL(kernel_getsockname); +/** + * kernel_peername - get the address which the socket is connected (kernel space) + * @sock: socket + * @addr: address holder + * + * Fills the @addr pointer with the address which the socket is connected. + * Returns 0 or an error code. + */ + int kernel_getpeername(struct socket *sock, struct sockaddr *addr) { return sock->ops->getname(sock, addr, 1); } EXPORT_SYMBOL(kernel_getpeername); +/** + * kernel_getsockopt - get a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Assigns the option length to @optlen. + * Returns 0 or an error. + */ + int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { @@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_getsockopt); +/** + * kernel_setsockopt - set a socket option (kernel space) + * @sock: socket + * @level: API level (SOL_SOCKET, ...) + * @optname: option tag + * @optval: option value + * @optlen: option length + * + * Returns 0 or an error. + */ + int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { @@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, } EXPORT_SYMBOL(kernel_setsockopt); +/** + * kernel_sendpage - send a &page through a socket (kernel space) + * @sock: socket + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + */ + int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { @@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage); +/** + * kernel_sendpage_locked - send a &page through the locked sock (kernel space) + * @sk: sock + * @page: page + * @offset: page offset + * @size: total size in bytes + * @flags: flags (MSG_DONTWAIT, ...) + * + * Returns the total amount sent in bytes or an error. + * Caller must hold @sk. + */ + int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags) { @@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage_locked); +/** + * kernel_shutdown - shut down part of a full-duplex connection (kernel space) + * @sock: socket + * @how: connection part + * + * Returns 0 or an error. + */ + int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } EXPORT_SYMBOL(kernel_sock_shutdown); -/* This routine returns the IP overhead imposed by a socket i.e. - * the length of the underlying IP header, depending on whether - * this is an IPv4 or IPv6 socket and the length from IP options turned - * on at the socket. Assumes that the caller has a lock on the socket. +/** + * kernel_sock_ip_overhead - returns the IP overhead imposed by a socket + * @sk: socket + * + * This routine returns the IP overhead imposed by a socket i.e. + * the length of the underlying IP header, depending on whether + * this is an IPv4 or IPv6 socket and the length from IP options turned + * on at the socket. Assumes that the caller has a lock on the socket. */ + u32 kernel_sock_ip_overhead(struct sock *sk) { struct inet_sock *inet; -- cgit From daa5c4d0167a308306525fd5ab9a5e18e21f4f74 Mon Sep 17 00:00:00 2001 From: Jerome Brunet Date: Thu, 14 Mar 2019 14:49:45 +0100 Subject: net: phy: meson-gxl: fix interrupt support If an interrupt is already pending when the interrupt is enabled on the GXL phy, no IRQ will ever be triggered. The fix is simply to make sure pending IRQs are cleared before setting up the irq mask. Fixes: cf127ff20af1 ("net: phy: meson-gxl: add interrupt support") Signed-off-by: Jerome Brunet Signed-off-by: David S. Miller --- drivers/net/phy/meson-gxl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/phy/meson-gxl.c b/drivers/net/phy/meson-gxl.c index a238388eb1a5..0eec2913c289 100644 --- a/drivers/net/phy/meson-gxl.c +++ b/drivers/net/phy/meson-gxl.c @@ -201,6 +201,7 @@ static int meson_gxl_ack_interrupt(struct phy_device *phydev) static int meson_gxl_config_intr(struct phy_device *phydev) { u16 val; + int ret; if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { val = INTSRC_ANEG_PR @@ -213,6 +214,11 @@ static int meson_gxl_config_intr(struct phy_device *phydev) val = 0; } + /* Ack any pending IRQ */ + ret = meson_gxl_ack_interrupt(phydev); + if (ret) + return ret; + return phy_write(phydev, INTSRC_MASK, val); } -- cgit From 4477138fa0ae4e1b699786ef0600863ea6e6c61c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Mar 2019 20:19:47 -0700 Subject: tun: properly test for IFF_UP Same reasons than the ones explained in commit 4179cb5a4c92 ("vxlan: test dev->flags & IFF_UP before calling netif_rx()") netif_rx_ni() or napi_gro_frags() must be called under a strict contract. At device dismantle phase, core networking clears IFF_UP and flush_all_backlogs() is called after rcu grace period to make sure no incoming packet might be in a cpu backlog and still referencing the device. A similar protocol is used for gro layer. Most drivers call netif_rx() from their interrupt handler, and since the interrupts are disabled at device dismantle, netif_rx() does not have to check dev->flags & IFF_UP Virtual drivers do not have this guarantee, and must therefore make the check themselves. Fixes: 1bd4978a88ac ("tun: honor IFF_UP in tun_get_user()") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/tun.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 1d68921723dc..0d343359f647 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1763,9 +1763,6 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); - if (!(tun->dev->flags & IFF_UP)) - return -EIO; - if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) return -EINVAL; @@ -1867,6 +1864,8 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, err = skb_copy_datagram_from_iter(skb, 0, from, len); if (err) { + err = -EFAULT; +drop: this_cpu_inc(tun->pcpu_stats->rx_dropped); kfree_skb(skb); if (frags) { @@ -1874,7 +1873,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, mutex_unlock(&tfile->napi_mutex); } - return -EFAULT; + return err; } } @@ -1958,6 +1957,12 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, !tfile->detached) rxhash = __skb_get_hash_symmetric(skb); + rcu_read_lock(); + if (unlikely(!(tun->dev->flags & IFF_UP))) { + err = -EIO; + goto drop; + } + if (frags) { /* Exercise flow dissector code path. */ u32 headlen = eth_get_headlen(skb->data, skb_headlen(skb)); @@ -1965,6 +1970,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (unlikely(headlen > skb_headlen(skb))) { this_cpu_inc(tun->pcpu_stats->rx_dropped); napi_free_frags(&tfile->napi); + rcu_read_unlock(); mutex_unlock(&tfile->napi_mutex); WARN_ON(1); return -ENOMEM; @@ -1992,6 +1998,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, } else { netif_rx_ni(skb); } + rcu_read_unlock(); stats = get_cpu_ptr(tun->pcpu_stats); u64_stats_update_begin(&stats->syncp); -- cgit From 044175a06706d516aa42874bb44dbbfc3c4d20eb Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Wed, 13 Mar 2019 15:15:49 +0100 Subject: xsk: fix umem memory leak on cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the umem is cleaned up, the task that created it might already be gone. If the task was gone, the xdp_umem_release function did not free the pages member of struct xdp_umem. It turned out that the task lookup was not needed at all; The code was a left-over when we moved from task accounting to user accounting [1]. This patch fixes the memory leak by removing the task lookup logic completely. [1] https://lore.kernel.org/netdev/20180131135356.19134-3-bjorn.topel@gmail.com/ Link: https://lore.kernel.org/netdev/c1cb2ca8-6a14-3980-8672-f3de0bb38dfd@suse.cz/ Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt") Reported-by: Jiri Slaby Signed-off-by: Björn Töpel Signed-off-by: Daniel Borkmann --- include/net/xdp_sock.h | 1 - net/xdp/xdp_umem.c | 19 +------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 61cf7dbb6782..d074b6d60f8a 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -36,7 +36,6 @@ struct xdp_umem { u32 headroom; u32 chunk_size_nohr; struct user_struct *user; - struct pid *pid; unsigned long address; refcount_t users; struct work_struct work; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 77520eacee8f..989e52386c35 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem) static void xdp_umem_release(struct xdp_umem *umem) { - struct task_struct *task; - struct mm_struct *mm; - xdp_umem_clear_dev(umem); ida_simple_remove(&umem_ida, umem->id); @@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem) xdp_umem_unpin_pages(umem); - task = get_pid_task(umem->pid, PIDTYPE_PID); - put_pid(umem->pid); - if (!task) - goto out; - mm = get_task_mm(task); - put_task_struct(task); - if (!mm) - goto out; - - mmput(mm); kfree(umem->pages); umem->pages = NULL; xdp_umem_unaccount_pages(umem); -out: kfree(umem); } @@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (size_chk < 0) return -EINVAL; - umem->pid = get_task_pid(current, PIDTYPE_PID); umem->address = (unsigned long)addr; umem->chunk_mask = ~((u64)chunk_size - 1); umem->size = size; @@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) err = xdp_umem_account_pages(umem); if (err) - goto out; + return err; err = xdp_umem_pin_pages(umem); if (err) @@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) out_account: xdp_umem_unaccount_pages(umem); -out: - put_pid(umem->pid); return err; } -- cgit From 86be36f6502c52ddb4b85938145324fd07332da1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Fri, 15 Mar 2019 20:21:19 +0530 Subject: powerpc: bpf: Fix generation of load/store DW instructions Yauheni Kaliuta pointed out that PTR_TO_STACK store/load verifier test was failing on powerpc64 BE, and rightfully indicated that the PPC_LD() macro is not masking away the last two bits of the offset per the ISA, resulting in the generation of 'lwa' instruction instead of the intended 'ld' instruction. Segher also pointed out that we can't simply mask away the last two bits as that will result in loading/storing from/to a memory location that was not intended. This patch addresses this by using ldx/stdx if the offset is not word-aligned. We load the offset into a temporary register (TMP_REG_2) and use that as the index register in a subsequent ldx/stdx. We fix PPC_LD() macro to mask off the last two bits, but enhance PPC_BPF_LL() and PPC_BPF_STL() to factor in the offset value and generate the proper instruction sequence. We also convert all existing users of PPC_LD() and PPC_STD() to use these macros. All existing uses of these macros have been audited to ensure that TMP_REG_2 can be clobbered. Fixes: 156d0e290e96 ("powerpc/ebpf/jit: Implement JIT compiler for extended BPF") Cc: stable@vger.kernel.org # v4.9+ Reported-by: Yauheni Kaliuta Signed-off-by: Naveen N. Rao Signed-off-by: Daniel Borkmann --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/net/bpf_jit.h | 17 +++++------------ arch/powerpc/net/bpf_jit32.h | 4 ++++ arch/powerpc/net/bpf_jit64.h | 20 ++++++++++++++++++++ arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++------ 5 files changed, 37 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index f9513ad38fa6..e6c9ad088f45 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -302,6 +302,7 @@ /* Misc instructions for BPF compiler */ #define PPC_INST_LBZ 0x88000000 #define PPC_INST_LD 0xe8000000 +#define PPC_INST_LDX 0x7c00002a #define PPC_INST_LHZ 0xa0000000 #define PPC_INST_LWZ 0x80000000 #define PPC_INST_LHBRX 0x7c00062c @@ -309,6 +310,7 @@ #define PPC_INST_STB 0x98000000 #define PPC_INST_STH 0xb0000000 #define PPC_INST_STD 0xf8000000 +#define PPC_INST_STDX 0x7c00012a #define PPC_INST_STDU 0xf8000001 #define PPC_INST_STW 0x90000000 #define PPC_INST_STWU 0x94000000 diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 549e9490ff2a..dcac37745b05 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -51,6 +51,8 @@ #define PPC_LIS(r, i) PPC_ADDIS(r, 0, i) #define PPC_STD(r, base, i) EMIT(PPC_INST_STD | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_STDX(r, base, b) EMIT(PPC_INST_STDX | ___PPC_RS(r) | \ + ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_STDU(r, base, i) EMIT(PPC_INST_STDU | ___PPC_RS(r) | \ ___PPC_RA(base) | ((i) & 0xfffc)) #define PPC_STW(r, base, i) EMIT(PPC_INST_STW | ___PPC_RS(r) | \ @@ -65,7 +67,9 @@ #define PPC_LBZ(r, base, i) EMIT(PPC_INST_LBZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define PPC_LD(r, base, i) EMIT(PPC_INST_LD | ___PPC_RT(r) | \ - ___PPC_RA(base) | IMM_L(i)) + ___PPC_RA(base) | ((i) & 0xfffc)) +#define PPC_LDX(r, base, b) EMIT(PPC_INST_LDX | ___PPC_RT(r) | \ + ___PPC_RA(base) | ___PPC_RB(b)) #define PPC_LWZ(r, base, i) EMIT(PPC_INST_LWZ | ___PPC_RT(r) | \ ___PPC_RA(base) | IMM_L(i)) #define PPC_LHZ(r, base, i) EMIT(PPC_INST_LHZ | ___PPC_RT(r) | \ @@ -85,17 +89,6 @@ ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_BPF_STDCX(s, a, b) EMIT(PPC_INST_STDCX | ___PPC_RS(s) | \ ___PPC_RA(a) | ___PPC_RB(b)) - -#ifdef CONFIG_PPC64 -#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0) -#else -#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) -#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) -#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) -#endif - #define PPC_CMPWI(a, i) EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPDI(a, i) EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i)) #define PPC_CMPW(a, b) EMIT(PPC_INST_CMPW | ___PPC_RA(a) | \ diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h index 6f4daacad296..ade04547703f 100644 --- a/arch/powerpc/net/bpf_jit32.h +++ b/arch/powerpc/net/bpf_jit32.h @@ -123,6 +123,10 @@ DECLARE_LOAD_FUNC(sk_load_byte_msh); #define PPC_NTOHS_OFFS(r, base, i) PPC_LHZ_OFFS(r, base, i) #endif +#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0) +#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0) +#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0) + #define SEEN_DATAREF 0x10000 /* might call external helpers */ #define SEEN_XREG 0x20000 /* X reg is used */ #define SEEN_MEM 0x40000 /* SEEN_MEM+(1< MAX_TAIL_CALL_CNT) * goto out; */ - PPC_LD(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); + PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx)); PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT); PPC_BCC(COND_GT, out); @@ -265,7 +265,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 /* prog = array->ptrs[index]; */ PPC_MULI(b2p[TMP_REG_1], b2p_index, 8); PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array); - PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs)); /* * if (prog == NULL) @@ -275,7 +275,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 PPC_BCC(COND_EQ, out); /* goto *(prog->bpf_func + prologue_size); */ - PPC_LD(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func)); #ifdef PPC64_ELF_ABI_v1 /* skip past the function descriptor */ PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], @@ -606,7 +606,7 @@ bpf_alu32_trunc: * the instructions generated will remain the * same across all passes */ - PPC_STD(dst_reg, 1, bpf_jit_stack_local(ctx)); + PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx)); PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)); PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]); break; @@ -662,7 +662,7 @@ emit_clear: PPC_LI32(b2p[TMP_REG_1], imm); src_reg = b2p[TMP_REG_1]; } - PPC_STD(src_reg, dst_reg, off); + PPC_BPF_STL(src_reg, dst_reg, off); break; /* @@ -709,7 +709,7 @@ emit_clear: break; /* dst = *(u64 *)(ul) (src + off) */ case BPF_LDX | BPF_MEM | BPF_DW: - PPC_LD(dst_reg, src_reg, off); + PPC_BPF_LL(dst_reg, src_reg, off); break; /* -- cgit From 6ade657d6125ec3ec07f95fa51e28138aef6208f Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 22:58:29 -0500 Subject: ALSA: echoaudio: add a check for ioremap_nocache In case ioremap_nocache fails, the fix releases chip and returns an error code upstream to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai --- sound/pci/echoaudio/echoaudio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index ea876b0b02b9..dc0084dc8550 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1952,6 +1952,11 @@ static int snd_echo_create(struct snd_card *card, } chip->dsp_registers = (volatile u32 __iomem *) ioremap_nocache(chip->dsp_registers_phys, sz); + if (!chip->dsp_registers) { + dev_err(chip->card->dev, "ioremap failed\n"); + snd_echo_free(chip); + return -ENOMEM; + } if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, KBUILD_MODNAME, chip)) { -- cgit From dcd0feac9bab901d5739de51b3f69840851f8919 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 23:04:14 -0500 Subject: ALSA: sb8: add a check for request_region In case request_region fails, the fix returns an error code to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai --- sound/isa/sb/sb8.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index aa2a83eb81a9..dc27a480c2d9 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -111,6 +111,10 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev) /* block the 0x388 port to avoid PnP conflicts */ acard->fm_res = request_region(0x388, 4, "SoundBlaster FM"); + if (!acard->fm_res) { + err = -EBUSY; + goto _err; + } if (port[dev] != SNDRV_AUTO_PORT) { if ((err = snd_sbdsp_create(card, port[dev], irq[dev], -- cgit From 6f19893b644a9454d85e593b5e90914e7a72b7dd Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 23:20:16 -0500 Subject: net: openvswitch: fix a NULL pointer dereference upcall is dereferenced even when genlmsg_put fails. The fix goto out to avoid the NULL pointer dereference in this case. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 6679e96ab1dc..45d1469308b0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -448,6 +448,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 0, upcall_info->cmd); + if (!upcall) { + err = -EINVAL; + goto out; + } upcall->dp_ifindex = dp_ifindex; err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); -- cgit From 0fff9bd47e1341b5c4db862cc39fc68ce45f165d Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 01:11:22 -0500 Subject: net: openvswitch: fix missing checks for nla_nest_start nla_nest_start may fail and thus deserves a check. The fix returns -EMSGSIZE when it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/openvswitch/datapath.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45d1469308b0..9dd158ab51b3 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -464,6 +464,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->egress_tun_info) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_tunnel_info(user_skb, upcall_info->egress_tun_info); BUG_ON(err); @@ -472,6 +476,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, if (upcall_info->actions_len) { nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); + if (!nla) { + err = -EMSGSIZE; + goto out; + } err = ovs_nla_put_actions(upcall_info->actions, upcall_info->actions_len, user_skb); -- cgit From 07660ca679da3007d3231938e2dfb415d3440716 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 01:14:33 -0500 Subject: net: ncsi: fix a missing check for nla_nest_start nla_nest_start may fail and thus deserves a check. The fix returns -EMSGSIZE in case it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/ncsi/ncsi-netlink.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ncsi/ncsi-netlink.c b/net/ncsi/ncsi-netlink.c index 5d782445d2fc..bad17bba8ba7 100644 --- a/net/ncsi/ncsi-netlink.c +++ b/net/ncsi/ncsi-netlink.c @@ -251,6 +251,10 @@ static int ncsi_pkg_info_all_nl(struct sk_buff *skb, } attr = nla_nest_start(skb, NCSI_ATTR_PACKAGE_LIST); + if (!attr) { + rc = -EMSGSIZE; + goto err; + } rc = ncsi_write_package_info(skb, ndp, package->id); if (rc) { nla_nest_cancel(skb, attr); -- cgit From 4589e28db46ee4961edfd794c5bb43887d38c8e5 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 12:11:59 -0500 Subject: net: tipc: fix a missing check of nla_nest_start nla_nest_start could fail and requires a check. The fix returns -EMSGSIZE if it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/group.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/tipc/group.c b/net/tipc/group.c index 06fee142f09f..63f39201e41e 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -919,6 +919,9 @@ int tipc_group_fill_sock_diag(struct tipc_group *grp, struct sk_buff *skb) { struct nlattr *group = nla_nest_start(skb, TIPC_NLA_SOCK_GROUP); + if (!group) + return -EMSGSIZE; + if (nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_ID, grp->type) || nla_put_u32(skb, TIPC_NLA_SOCK_GROUP_INSTANCE, -- cgit From 9180bb4f046064dfa4541488102703b402bb04e1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 16 Mar 2019 13:09:53 -0700 Subject: tun: add a missing rcu_read_unlock() in error path In my latest patch I missed one rcu_read_unlock(), in case device is down. Fixes: 4477138fa0ae ("tun: properly test for IFF_UP") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- drivers/net/tun.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 0d343359f647..e9ca1c088d0b 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1960,6 +1960,7 @@ drop: rcu_read_lock(); if (unlikely(!(tun->dev->flags & IFF_UP))) { err = -EIO; + rcu_read_unlock(); goto drop; } -- cgit From 517ccc2aa50dbd7767a9eb8e1d9987a3ed7ced3e Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sat, 16 Mar 2019 16:46:05 -0500 Subject: net: tipc: fix a missing check for nla_nest_start nla_nest_start may fail. The fix check its status and returns -EMSGSIZE in case it fails. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/socket.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3274ef625dba..d6b26862b34e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -3255,6 +3255,8 @@ static int __tipc_nl_add_sk_con(struct sk_buff *skb, struct tipc_sock *tsk) peer_port = tsk_peer_port(tsk); nest = nla_nest_start(skb, TIPC_NLA_SOCK_CON); + if (!nest) + return -EMSGSIZE; if (nla_put_u32(skb, TIPC_NLA_CON_NODE, peer_node)) goto msg_full; -- cgit From 2d012c65a9ca26a0ef87ea0a42f1653dd37155f5 Mon Sep 17 00:00:00 2001 From: Takashi Sakamoto Date: Sun, 17 Mar 2019 15:49:29 +0900 Subject: ALSA: firewire-motu: use 'version' field of unit directory to identify model Current ALSA firewire-motu driver uses the value of 'model' field of unit directory in configuration ROM for modalias for MOTU FireWire models. However, as long as I checked, Pre8 and 828mk3(Hybrid) have the same value for the field (=0x100800). unit | version | model --------------- | --------- | ---------- 828mkII | 0x000003 | 0x101800 Traveler | 0x000009 | 0x107800 Pre8 | 0x00000f | 0x100800 <- 828mk3(FW) | 0x000015 | 0x106800 AudioExpress | 0x000033 | 0x104800 828mk3(Hybrid) | 0x000035 | 0x100800 <- When updating firmware for MOTU 8pre FireWire from v1.0.0 to v1.0.3, I got change of the value from 0x100800 to 0x103800. On the other hand, the value of 'version' field is fixed to 0x00000f. As a quick glance, the higher 12 bits of the value of 'version' field represent firmware version, while the lower 12 bits is unknown. By induction, the value of 'version' field represents actual model. This commit changes modalias to match the value of 'version' field, instead of 'model' field. For degug, long name of added sound card includes hexadecimal value of 'model' field. Fixes: 6c5e1ac0e144 ("ALSA: firewire-motu: add support for Motu Traveler") Signed-off-by: Takashi Sakamoto Cc: # v4.19+ Signed-off-by: Takashi Iwai --- sound/firewire/motu/motu.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sound/firewire/motu/motu.c b/sound/firewire/motu/motu.c index 220e61926ea4..513291ba0ab0 100644 --- a/sound/firewire/motu/motu.c +++ b/sound/firewire/motu/motu.c @@ -36,7 +36,7 @@ static void name_card(struct snd_motu *motu) fw_csr_iterator_init(&it, motu->unit->directory); while (fw_csr_iterator_next(&it, &key, &val)) { switch (key) { - case CSR_VERSION: + case CSR_MODEL: version = val; break; } @@ -46,7 +46,7 @@ static void name_card(struct snd_motu *motu) strcpy(motu->card->shortname, motu->spec->name); strcpy(motu->card->mixername, motu->spec->name); snprintf(motu->card->longname, sizeof(motu->card->longname), - "MOTU %s (version:%d), GUID %08x%08x at %s, S%d", + "MOTU %s (version:%06x), GUID %08x%08x at %s, S%d", motu->spec->name, version, fw_dev->config_rom[3], fw_dev->config_rom[4], dev_name(&motu->unit->device), 100 << fw_dev->max_speed); @@ -237,20 +237,20 @@ static const struct snd_motu_spec motu_audio_express = { #define SND_MOTU_DEV_ENTRY(model, data) \ { \ .match_flags = IEEE1394_MATCH_VENDOR_ID | \ - IEEE1394_MATCH_MODEL_ID | \ - IEEE1394_MATCH_SPECIFIER_ID, \ + IEEE1394_MATCH_SPECIFIER_ID | \ + IEEE1394_MATCH_VERSION, \ .vendor_id = OUI_MOTU, \ - .model_id = model, \ .specifier_id = OUI_MOTU, \ + .version = model, \ .driver_data = (kernel_ulong_t)data, \ } static const struct ieee1394_device_id motu_id_table[] = { - SND_MOTU_DEV_ENTRY(0x101800, &motu_828mk2), - SND_MOTU_DEV_ENTRY(0x107800, &snd_motu_spec_traveler), - SND_MOTU_DEV_ENTRY(0x106800, &motu_828mk3), /* FireWire only. */ - SND_MOTU_DEV_ENTRY(0x100800, &motu_828mk3), /* Hybrid. */ - SND_MOTU_DEV_ENTRY(0x104800, &motu_audio_express), + SND_MOTU_DEV_ENTRY(0x000003, &motu_828mk2), + SND_MOTU_DEV_ENTRY(0x000009, &snd_motu_spec_traveler), + SND_MOTU_DEV_ENTRY(0x000015, &motu_828mk3), /* FireWire only. */ + SND_MOTU_DEV_ENTRY(0x000035, &motu_828mk3), /* Hybrid. */ + SND_MOTU_DEV_ENTRY(0x000033, &motu_audio_express), { } }; MODULE_DEVICE_TABLE(ieee1394, motu_id_table); -- cgit From 41c8d0adf3c4df1867d98cee4a2c4531352a33ad Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Mar 2019 16:44:28 +0200 Subject: auxdisplay: hd44780: Fix memory leak on ->remove() We have to free on ->remove() the allocated resources on ->probe(). Fixes: d47d88361fee ("auxdisplay: Add HD44780 Character LCD support") Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/hd44780.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index 9ad93ea42fdc..3cde351fb5c9 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -280,6 +280,8 @@ static int hd44780_remove(struct platform_device *pdev) struct charlcd *lcd = platform_get_drvdata(pdev); charlcd_unregister(lcd); + + kfree(lcd); return 0; } -- cgit From b658a2113ba4d4b99e2a57926379b0c0b0c648ab Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Mar 2019 16:44:29 +0200 Subject: auxdisplay: charlcd: Move to_priv() to charlcd namespace In order to be more particular in names, rename to_priv() macro to charlcd_to_priv(). No functional change intended. Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 60e0b772673f..407acd22efa8 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -91,7 +91,7 @@ struct charlcd_priv { unsigned long long drvdata[0]; }; -#define to_priv(p) container_of(p, struct charlcd_priv, lcd) +#define charlcd_to_priv(p) container_of(p, struct charlcd_priv, lcd) /* Device single-open policy control */ static atomic_t charlcd_available = ATOMIC_INIT(1); @@ -105,7 +105,7 @@ static void long_sleep(int ms) /* turn the backlight on or off */ static void charlcd_backlight(struct charlcd *lcd, int on) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); if (!lcd->ops->backlight) return; @@ -134,7 +134,7 @@ static void charlcd_bl_off(struct work_struct *work) /* turn the backlight on for a little while */ void charlcd_poke(struct charlcd *lcd) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); if (!lcd->ops->backlight) return; @@ -152,7 +152,7 @@ EXPORT_SYMBOL_GPL(charlcd_poke); static void charlcd_gotoxy(struct charlcd *lcd) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); unsigned int addr; /* @@ -170,7 +170,7 @@ static void charlcd_gotoxy(struct charlcd *lcd) static void charlcd_home(struct charlcd *lcd) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); priv->addr.x = 0; priv->addr.y = 0; @@ -179,7 +179,7 @@ static void charlcd_home(struct charlcd *lcd) static void charlcd_print(struct charlcd *lcd, char c) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); if (priv->addr.x < lcd->bwidth) { if (lcd->char_conv) @@ -211,7 +211,7 @@ static void charlcd_clear_fast(struct charlcd *lcd) /* clears the display and resets X/Y */ static void charlcd_clear_display(struct charlcd *lcd) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); lcd->ops->write_cmd(lcd, LCD_CMD_DISPLAY_CLEAR); priv->addr.x = 0; @@ -223,7 +223,7 @@ static void charlcd_clear_display(struct charlcd *lcd) static int charlcd_init_display(struct charlcd *lcd) { void (*write_cmd_raw)(struct charlcd *lcd, int cmd); - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); u8 init; if (lcd->ifwidth != 4 && lcd->ifwidth != 8) @@ -369,7 +369,7 @@ static bool parse_xy(const char *s, unsigned long *x, unsigned long *y) static inline int handle_lcd_special_code(struct charlcd *lcd) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); /* LCD special codes */ @@ -580,7 +580,7 @@ static inline int handle_lcd_special_code(struct charlcd *lcd) static void charlcd_write_char(struct charlcd *lcd, char c) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); /* first, we'll test if we're in escape mode */ if ((c != '\n') && priv->esc_seq.len >= 0) { @@ -705,7 +705,7 @@ static ssize_t charlcd_write(struct file *file, const char __user *buf, static int charlcd_open(struct inode *inode, struct file *file) { - struct charlcd_priv *priv = to_priv(the_charlcd); + struct charlcd_priv *priv = charlcd_to_priv(the_charlcd); int ret; ret = -EBUSY; @@ -766,7 +766,7 @@ static void charlcd_puts(struct charlcd *lcd, const char *s) /* initialize the LCD driver */ static int charlcd_init(struct charlcd *lcd) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); int ret; if (lcd->ops->backlight) { @@ -866,7 +866,7 @@ EXPORT_SYMBOL_GPL(charlcd_register); int charlcd_unregister(struct charlcd *lcd) { - struct charlcd_priv *priv = to_priv(lcd); + struct charlcd_priv *priv = charlcd_to_priv(lcd); unregister_reboot_notifier(&panel_notifier); charlcd_puts(lcd, "\x0cLCD driver unloaded.\x1b[Lc\x1b[Lb\x1b[L-"); -- cgit From 8e44fc85060ec997e9c6f3c49a04274db6621d26 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Mar 2019 16:44:30 +0200 Subject: auxdisplay: charlcd: Introduce charlcd_free() helper The charlcd_free() is a counterpart to charlcd_alloc() and should be called symmetrically on tear down. Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 6 ++++++ include/misc/charlcd.h | 1 + 2 files changed, 7 insertions(+) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 407acd22efa8..a351a9400054 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -818,6 +818,12 @@ struct charlcd *charlcd_alloc(unsigned int drvdata_size) } EXPORT_SYMBOL_GPL(charlcd_alloc); +void charlcd_free(struct charlcd *lcd) +{ + kfree(charlcd_to_priv(lcd)); +} +EXPORT_SYMBOL_GPL(charlcd_free); + static int panel_notify_sys(struct notifier_block *this, unsigned long code, void *unused) { diff --git a/include/misc/charlcd.h b/include/misc/charlcd.h index 23f61850f363..1832402324ce 100644 --- a/include/misc/charlcd.h +++ b/include/misc/charlcd.h @@ -35,6 +35,7 @@ struct charlcd_ops { }; struct charlcd *charlcd_alloc(unsigned int drvdata_size); +void charlcd_free(struct charlcd *lcd); int charlcd_register(struct charlcd *lcd); int charlcd_unregister(struct charlcd *lcd); -- cgit From 9b11d63966fc70b5496013bb211fa9025cd1ad61 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Mar 2019 16:44:31 +0200 Subject: auxdisplay: panel: Convert to use charlcd_free() Convert to use charlcd_free() instead of kfree() for sake of type check. Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/panel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/auxdisplay/panel.c b/drivers/auxdisplay/panel.c index 21b9b2f2470a..e06de63497cf 100644 --- a/drivers/auxdisplay/panel.c +++ b/drivers/auxdisplay/panel.c @@ -1620,7 +1620,7 @@ err_lcd_unreg: if (lcd.enabled) charlcd_unregister(lcd.charlcd); err_unreg_device: - kfree(lcd.charlcd); + charlcd_free(lcd.charlcd); lcd.charlcd = NULL; parport_unregister_device(pprt); pprt = NULL; @@ -1647,7 +1647,7 @@ static void panel_detach(struct parport *port) if (lcd.enabled) { charlcd_unregister(lcd.charlcd); lcd.initialized = false; - kfree(lcd.charlcd); + charlcd_free(lcd.charlcd); lcd.charlcd = NULL; } -- cgit From cb79eb95c56fe6afe2baf1df01f22b1bed3f6060 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Mar 2019 16:44:32 +0200 Subject: auxdisplay: hd44780: Convert to use charlcd_free() Convert to use charlcd_free() instead of kfree() for sake of type check. Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/hd44780.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index 3cde351fb5c9..ab15b64707ad 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -271,7 +271,7 @@ static int hd44780_probe(struct platform_device *pdev) return 0; fail: - kfree(lcd); + charlcd_free(lcd); return ret; } @@ -281,7 +281,7 @@ static int hd44780_remove(struct platform_device *pdev) charlcd_unregister(lcd); - kfree(lcd); + charlcd_free(lcd); return 0; } -- cgit From 24c764abfd0d4b6e8e33c3818b668edbb4936d6f Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 1 Mar 2019 18:48:14 +0000 Subject: auxdisplay: deconfuse configuration The auxdisplay Kconfig is confusing. It creates two separate menus even though the settings are closely related. Moreover, the options for setting the boot message depend on CONFIG_PARPORT even though they are used by drivers that do not. Clear up the confusion by moving the "Parallel port LCD/Keypad" menu under auxdisplay where it logically belongs. Change the boot message options to depend only on CONFIG_CHARLCD, making them accessible also when only the HD44780 is selected. Since the "Parallel port LCD/Keypad" driver now has a new dependency on CONFIG_AUXDISPLAY, rename its Kconfig symbol and keep the old one such that make oldconfig will not disable the driver. Signed-off-by: Mans Rullgard Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/Kconfig | 17 ++++++++++++----- drivers/auxdisplay/Makefile | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 57410f9c5d44..7d3fe27d6868 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -164,9 +164,7 @@ config ARM_CHARLCD line and the Linux version on the second line, but that's still useful. -endif # AUXDISPLAY - -menuconfig PANEL +menuconfig PARPORT_PANEL tristate "Parallel port LCD/Keypad Panel support" depends on PARPORT select CHARLCD @@ -178,7 +176,7 @@ menuconfig PANEL compiled as a module, or linked into the kernel and started at boot. If you don't understand what all this is about, say N. -if PANEL +if PARPORT_PANEL config PANEL_PARPORT int "Default parallel port number (0=LPT1)" @@ -419,8 +417,11 @@ config PANEL_LCD_PIN_BL Default for the 'BL' pin in custom profile is '0' (uncontrolled). +endif # PARPORT_PANEL + config PANEL_CHANGE_MESSAGE bool "Change LCD initialization message ?" + depends on CHARLCD default "n" ---help--- This allows you to replace the boot message indicating the kernel version @@ -444,7 +445,13 @@ config PANEL_BOOT_MESSAGE An empty message will only clear the display at driver init time. Any other printf()-formatted message is valid with newline and escape codes. -endif # PANEL +endif # AUXDISPLAY + +config PANEL + tristate "Parallel port LCD/Keypad Panel support (OLD OPTION)" + depends on PARPORT + select AUXDISPLAY + select PARPORT_PANEL config CHARLCD tristate "Character LCD core support" if COMPILE_TEST diff --git a/drivers/auxdisplay/Makefile b/drivers/auxdisplay/Makefile index 7ac6776ca3f6..cf54b5efb07e 100644 --- a/drivers/auxdisplay/Makefile +++ b/drivers/auxdisplay/Makefile @@ -10,4 +10,4 @@ obj-$(CONFIG_CFAG12864B) += cfag12864b.o cfag12864bfb.o obj-$(CONFIG_IMG_ASCII_LCD) += img-ascii-lcd.o obj-$(CONFIG_HD44780) += hd44780.o obj-$(CONFIG_HT16K33) += ht16k33.o -obj-$(CONFIG_PANEL) += panel.o +obj-$(CONFIG_PARPORT_PANEL) += panel.o -- cgit From c9171722459fdeab0f27790ae04c0c5a4ae5a9b2 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 1 Mar 2019 18:48:15 +0000 Subject: auxdisplay: charlcd: simplify init message display If CONFIG_PANEL_CHANGE_MESSAGE is set, CONFIG_PANEL_BOOT_MESSAGE will also be defined, so the double ifdef is pointless. Simplify the code further by using an intermediate macro rather duplicating most of the line. Signed-off-by: Mans Rullgard Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/charlcd.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index a351a9400054..5212675564d7 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -763,6 +763,12 @@ static void charlcd_puts(struct charlcd *lcd, const char *s) } } +#ifdef CONFIG_PANEL_BOOT_MESSAGE +#define LCD_INIT_TEXT CONFIG_PANEL_BOOT_MESSAGE +#else +#define LCD_INIT_TEXT "Linux-" UTS_RELEASE "\n" +#endif + /* initialize the LCD driver */ static int charlcd_init(struct charlcd *lcd) { @@ -784,13 +790,8 @@ static int charlcd_init(struct charlcd *lcd) return ret; /* display a short message */ -#ifdef CONFIG_PANEL_CHANGE_MESSAGE -#ifdef CONFIG_PANEL_BOOT_MESSAGE - charlcd_puts(lcd, "\x1b[Lc\x1b[Lb\x1b[L*" CONFIG_PANEL_BOOT_MESSAGE); -#endif -#else - charlcd_puts(lcd, "\x1b[Lc\x1b[Lb\x1b[L*Linux-" UTS_RELEASE "\n"); -#endif + charlcd_puts(lcd, "\x1b[Lc\x1b[Lb\x1b[L*" LCD_INIT_TEXT); + /* clear the display on the next device opening */ priv->must_clear = true; charlcd_home(lcd); -- cgit From cc5d04d840d62d7c75e268c51da7cd0be2ee03c0 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Fri, 1 Mar 2019 18:48:16 +0000 Subject: auxdisplay: charlcd: make backlight initial state configurable The charlcd driver currently flashes the backlight once on init. This may not be desirable. Thus, add options for turning the backlight off or on as well. Signed-off-by: Mans Rullgard Signed-off-by: Miguel Ojeda --- drivers/auxdisplay/Kconfig | 21 +++++++++++++++++++++ drivers/auxdisplay/charlcd.c | 10 +++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/drivers/auxdisplay/Kconfig b/drivers/auxdisplay/Kconfig index 7d3fe27d6868..c52c738e554a 100644 --- a/drivers/auxdisplay/Kconfig +++ b/drivers/auxdisplay/Kconfig @@ -445,6 +445,27 @@ config PANEL_BOOT_MESSAGE An empty message will only clear the display at driver init time. Any other printf()-formatted message is valid with newline and escape codes. +choice + prompt "Backlight initial state" + default CHARLCD_BL_FLASH + + config CHARLCD_BL_OFF + bool "Off" + help + Backlight is initially turned off + + config CHARLCD_BL_ON + bool "On" + help + Backlight is initially turned on + + config CHARLCD_BL_FLASH + bool "Flash" + help + Backlight is flashed briefly on init + +endchoice + endif # AUXDISPLAY config PANEL diff --git a/drivers/auxdisplay/charlcd.c b/drivers/auxdisplay/charlcd.c index 5212675564d7..92745efefb54 100644 --- a/drivers/auxdisplay/charlcd.c +++ b/drivers/auxdisplay/charlcd.c @@ -769,6 +769,14 @@ static void charlcd_puts(struct charlcd *lcd, const char *s) #define LCD_INIT_TEXT "Linux-" UTS_RELEASE "\n" #endif +#ifdef CONFIG_CHARLCD_BL_ON +#define LCD_INIT_BL "\x1b[L+" +#elif defined(CONFIG_CHARLCD_BL_FLASH) +#define LCD_INIT_BL "\x1b[L*" +#else +#define LCD_INIT_BL "\x1b[L-" +#endif + /* initialize the LCD driver */ static int charlcd_init(struct charlcd *lcd) { @@ -790,7 +798,7 @@ static int charlcd_init(struct charlcd *lcd) return ret; /* display a short message */ - charlcd_puts(lcd, "\x1b[Lc\x1b[Lb\x1b[L*" LCD_INIT_TEXT); + charlcd_puts(lcd, "\x1b[Lc\x1b[Lb" LCD_INIT_BL LCD_INIT_TEXT); /* clear the display on the next device opening */ priv->must_clear = true; -- cgit From 6958d11f77d45db80f7e22a21a74d4d5f44dc667 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Sun, 17 Mar 2019 15:21:49 -0700 Subject: xfs: don't trip over uninitialized buffer on extent read of corrupted inode We've had rather rare reports of bmap btree block corruption where the bmap root block has a level count of zero. The root cause of the corruption is so far unknown. We do have verifier checks to detect this form of on-disk corruption, but this doesn't cover a memory corruption variant of the problem. The latter is a reasonable possibility because the root block is part of the inode fork and can reside in-core for some time before inode extents are read. If this occurs, it leads to a system crash such as the following: BUG: unable to handle kernel paging request at ffffffff00000221 PF error: [normal kernel read fault] ... RIP: 0010:xfs_trans_brelse+0xf/0x200 [xfs] ... Call Trace: xfs_iread_extents+0x379/0x540 [xfs] xfs_file_iomap_begin_delay+0x11a/0xb40 [xfs] ? xfs_attr_get+0xd1/0x120 [xfs] ? iomap_write_begin.constprop.40+0x2d0/0x2d0 xfs_file_iomap_begin+0x4c4/0x6d0 [xfs] ? __vfs_getxattr+0x53/0x70 ? iomap_write_begin.constprop.40+0x2d0/0x2d0 iomap_apply+0x63/0x130 ? iomap_write_begin.constprop.40+0x2d0/0x2d0 iomap_file_buffered_write+0x62/0x90 ? iomap_write_begin.constprop.40+0x2d0/0x2d0 xfs_file_buffered_aio_write+0xe4/0x3b0 [xfs] __vfs_write+0x150/0x1b0 vfs_write+0xba/0x1c0 ksys_pwrite64+0x64/0xa0 do_syscall_64+0x5a/0x1d0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The crash occurs because xfs_iread_extents() attempts to release an uninitialized buffer pointer as the level == 0 value prevented the buffer from ever being allocated or read. Change the level > 0 assert to an explicit error check in xfs_iread_extents() to avoid crashing the kernel in the event of localized, in-core inode corruption. Signed-off-by: Brian Foster Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/libxfs/xfs_bmap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 48502cb9990f..ae4c3b0d84db 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1191,7 +1191,10 @@ xfs_iread_extents( * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out. */ level = be16_to_cpu(block->bb_level); - ASSERT(level > 0); + if (unlikely(level == 0)) { + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); + return -EFSCORRUPTED; + } pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes); bno = be64_to_cpu(*pp); -- cgit From 8f3b487685b2acf71b42bb30d68fd9271bec8695 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 15 Mar 2019 11:37:20 +1000 Subject: drm/udl: use drm_gem_object_put_unlocked. When Daniel removed struct_mutex he didn't fix this call to the unlocked variant which is required since we no longer use struct mutex. This fixes a bunch of: WARNING: CPU: 4 PID: 1370 at drivers/gpu/drm/drm_gem.c:931 drm_gem_object_put+0x2b/0x30 [drm] Modules linked in: udl xt_CHECKSUM ipt_MASQUERADE tun bridge stp llc nf_conntrack_netbios_ns nf_conntrack_broadcast xt_CT ip6t> CPU: 4 PID: 1370 Comm: Xorg Not tainted 5.0.0+ #2 backtraces when you plug in a udl device. Fixes: ae358dacd217 (drm/udl: Get rid of dev->struct_mutex usage) Reviewed-by: Daniel Vetter Cc: Sean Paul Signed-off-by: Dave Airlie --- drivers/gpu/drm/udl/udl_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c index d5a23295dd80..bb7b58407039 100644 --- a/drivers/gpu/drm/udl/udl_gem.c +++ b/drivers/gpu/drm/udl/udl_gem.c @@ -224,7 +224,7 @@ int udl_gem_mmap(struct drm_file *file, struct drm_device *dev, *offset = drm_vma_node_offset_addr(&gobj->base.vma_node); out: - drm_gem_object_put(&gobj->base); + drm_gem_object_put_unlocked(&gobj->base); unlock: mutex_unlock(&udl->gem_lock); return ret; -- cgit From 587443e7773e150ae29e643ee8f41a1eed226565 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 6 Mar 2019 19:17:56 +0200 Subject: IB/mlx4: Fix race condition between catas error reset and aliasguid flows Code review revealed a race condition which could allow the catas error flow to interrupt the alias guid query post mechanism at random points. Thiis is fixed by doing cancel_delayed_work_sync() instead of cancel_delayed_work() during the alias guid mechanism destroy flow. Fixes: a0c64a17aba8 ("mlx4: Add alias_guid mechanism") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 782499abcd98..2a0b59a4b6eb 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -804,8 +804,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) unsigned long flags; for (i = 0 ; i < dev->num_ports; i++) { - cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); det = &sriov->alias_guid.ports_guid[i]; + cancel_delayed_work_sync(&det->alias_guid_work); spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); while (!list_empty(&det->cb_list)) { cb_ctx = list_entry(det->cb_list.next, -- cgit From f84b66b9cce78e8f9d38204fdaa75f07c75f4911 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 6 Mar 2019 19:20:50 +0200 Subject: net/mlx5: Fix DCT creation bad flow In case the DCT creation command has succeeded a DRAIN must be issued before calling DESTROY. In addition, the original code used the wrong parameter for the DESTROY command, 'in' instead of 'din', which caused another creation try instead of destroying. Cc: # 4.15 Fixes: 57cda166bbe0 ("net/mlx5: Add DCT command interface") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 66 +++++++++++++++------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index 370ca94b6775..c7c2920c05c4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -40,6 +40,9 @@ #include "mlx5_core.h" #include "lib/eq.h" +static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct); + static struct mlx5_core_rsc_common * mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) { @@ -227,13 +230,42 @@ static void destroy_resource_common(struct mlx5_core_dev *dev, wait_for_completion(&qp->common.free); } +static int _mlx5_core_destroy_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct, bool need_cleanup) +{ + u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + goto destroy; + } else { + mlx5_core_warn( + dev, "failed drain DCT 0x%x with error 0x%x\n", + qp->qpn, err); + return err; + } + } + wait_for_completion(&dct->drained); +destroy: + if (need_cleanup) + destroy_resource_common(dev, &dct->mqp); + MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + MLX5_SET(destroy_dct_in, in, uid, qp->uid); + err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); + return err; +} + int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, u32 *in, int inlen) { u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; - u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; - u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; struct mlx5_core_qp *qp = &dct->mqp; int err; @@ -254,11 +286,7 @@ int mlx5_core_create_dct(struct mlx5_core_dev *dev, return 0; err_cmd: - MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); - MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); - MLX5_SET(destroy_dct_in, din, uid, qp->uid); - mlx5_cmd_exec(dev, (void *)&in, sizeof(din), - (void *)&out, sizeof(dout)); + _mlx5_core_destroy_dct(dev, dct, false); return err; } EXPORT_SYMBOL_GPL(mlx5_core_create_dct); @@ -323,29 +351,7 @@ static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct) { - u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; - u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; - struct mlx5_core_qp *qp = &dct->mqp; - int err; - - err = mlx5_core_drain_dct(dev, dct); - if (err) { - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - goto destroy; - } else { - mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err); - return err; - } - } - wait_for_completion(&dct->drained); -destroy: - destroy_resource_common(dev, &dct->mqp); - MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); - MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); - MLX5_SET(destroy_dct_in, in, uid, qp->uid); - err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), - (void *)&out, sizeof(out)); - return err; + return _mlx5_core_destroy_dct(dev, dct, true); } EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct); -- cgit From c5ae1954c47d3fd8815bd5a592aba18702c93f33 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Wed, 6 Mar 2019 19:21:42 +0200 Subject: IB/mlx5: Use mlx5 core to create/destroy a DEVX DCT To prevent a hardware memory leak when a DEVX DCT object is destroyed without calling DRAIN DCT before, (e.g. under cleanup flow), need to manage its creation and destruction via mlx5 core. In that case the DRAIN DCT command will be called and only once that it will be completed the DESTROY DCT command will be called. Otherwise, the DESTROY DCT may fail and a hardware leak may occur. As of that change the DRAIN DCT command should not be exposed any more from DEVX, it's managed internally by the driver to work as expected by the device specification. Fixes: 7efce3691d33 ("IB/mlx5: Add obj create and destroy functionality") Signed-off-by: Yishai Hadas Reviewed-by: Artemy Kovalyov Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 34 +++++++++++++++++++++------- drivers/infiniband/hw/mlx5/qp.c | 4 +++- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 6 ++--- include/linux/mlx5/qp.h | 3 ++- 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index eaa055007f28..9e08df7914aa 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -20,6 +20,7 @@ enum devx_obj_flags { DEVX_OBJ_FLAGS_INDIRECT_MKEY = 1 << 0, + DEVX_OBJ_FLAGS_DCT = 1 << 1, }; struct devx_async_data { @@ -39,7 +40,10 @@ struct devx_obj { u32 dinlen; /* destroy inbox length */ u32 dinbox[MLX5_MAX_DESTROY_INBOX_SIZE_DW]; u32 flags; - struct mlx5_ib_devx_mr devx_mr; + union { + struct mlx5_ib_devx_mr devx_mr; + struct mlx5_core_dct core_dct; + }; }; struct devx_umem { @@ -347,7 +351,6 @@ static u64 devx_get_obj_id(const void *in) obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_RQ, MLX5_GET(arm_rq_in, in, srq_number)); break; - case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: obj_id = get_enc_obj_id(MLX5_CMD_OP_CREATE_DCT, MLX5_GET(drain_dct_in, in, dctn)); @@ -618,7 +621,6 @@ static bool devx_is_obj_modify_cmd(const void *in) case MLX5_CMD_OP_2RST_QP: case MLX5_CMD_OP_ARM_XRC_SRQ: case MLX5_CMD_OP_ARM_RQ: - case MLX5_CMD_OP_DRAIN_DCT: case MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION: case MLX5_CMD_OP_ARM_XRQ: case MLX5_CMD_OP_SET_XRQ_DC_PARAMS_ENTRY: @@ -1124,7 +1126,11 @@ static int devx_obj_cleanup(struct ib_uobject *uobject, if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) devx_cleanup_mkey(obj); - ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (obj->flags & DEVX_OBJ_FLAGS_DCT) + ret = mlx5_core_destroy_dct(obj->mdev, &obj->core_dct); + else + ret = mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, + sizeof(out)); if (ib_is_destroy_retryable(ret, why, uobject)) return ret; @@ -1185,9 +1191,17 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)( devx_set_umem_valid(cmd_in); } - err = mlx5_cmd_exec(dev->mdev, cmd_in, - cmd_in_len, - cmd_out, cmd_out_len); + if (opcode == MLX5_CMD_OP_CREATE_DCT) { + obj->flags |= DEVX_OBJ_FLAGS_DCT; + err = mlx5_core_create_dct(dev->mdev, &obj->core_dct, + cmd_in, cmd_in_len, + cmd_out, cmd_out_len); + } else { + err = mlx5_cmd_exec(dev->mdev, cmd_in, + cmd_in_len, + cmd_out, cmd_out_len); + } + if (err) goto obj_free; @@ -1214,7 +1228,11 @@ err_copy: if (obj->flags & DEVX_OBJ_FLAGS_INDIRECT_MKEY) devx_cleanup_mkey(obj); obj_destroy: - mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, sizeof(out)); + if (obj->flags & DEVX_OBJ_FLAGS_DCT) + mlx5_core_destroy_dct(obj->mdev, &obj->core_dct); + else + mlx5_cmd_exec(obj->mdev, obj->dinbox, obj->dinlen, out, + sizeof(out)); obj_free: kfree(obj); return err; diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 6b1f0e76900b..7cd006da1dae 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3729,6 +3729,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, } else if (cur_state == IB_QPS_INIT && new_state == IB_QPS_RTR) { struct mlx5_ib_modify_qp_resp resp = {}; + u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; u32 min_resp_len = offsetof(typeof(resp), dctn) + sizeof(resp.dctn); @@ -3747,7 +3748,8 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr, MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit); err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in, - MLX5_ST_SZ_BYTES(create_dct_in)); + MLX5_ST_SZ_BYTES(create_dct_in), out, + sizeof(out)); if (err) return err; resp.dctn = qp->dct.mdct.mqp.qpn; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index c7c2920c05c4..b8ba74de9555 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -263,16 +263,16 @@ destroy: int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, - u32 *in, int inlen) + u32 *in, int inlen, + u32 *out, int outlen) { - u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; struct mlx5_core_qp *qp = &dct->mqp; int err; init_completion(&dct->drained); MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) { mlx5_core_warn(dev, "create DCT failed, ret %d\n", err); return err; diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index b26ea9077384..0343c81d4c5f 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -557,7 +557,8 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, int mlx5_core_create_dct(struct mlx5_core_dev *dev, struct mlx5_core_dct *qp, - u32 *in, int inlen); + u32 *in, int inlen, + u32 *out, int outlen); int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *in, -- cgit From cd27287562d69629c5f007c6f64c27b6ff15f3e5 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Mon, 11 Mar 2019 14:35:58 +0200 Subject: IB/mlx5: Fix mapping of link-mode to IB width and speed Add mapping of link mode: CAUI4 100Gbps CR4/KR4 with 4 lines and 25Gbps. Fix mapping of link mode: GAUI2 50Gbps CR2/KR2 to be 2 lines with 25Gbps. Fixes: 08e8676f1607 ("IB/mlx5: Add support for 50Gbps per lane link modes") Signed-off-by: Aya Levin Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 994c19d01211..531ff20b32ad 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -415,10 +415,17 @@ static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed, *active_speed = IB_SPEED_EDR; break; case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2): + *active_width = IB_WIDTH_2X; + *active_speed = IB_SPEED_EDR; + break; case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR): *active_width = IB_WIDTH_1X; *active_speed = IB_SPEED_HDR; break; + case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4): + *active_width = IB_WIDTH_4X; + *active_speed = IB_SPEED_EDR; + break; case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2): *active_width = IB_WIDTH_2X; *active_speed = IB_SPEED_HDR; -- cgit From ec4fe4bcc584b55e24e8d1768f5510a62c0fd619 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Thu, 14 Mar 2019 18:37:29 +0800 Subject: i40iw: Avoid panic when handling the inetdev event There is a panic reported that on a system with x722 ethernet, when doing the operations like: # ip link add br0 type bridge # ip link set eno1 master br0 # systemctl restart systemd-networkd The system will panic "BUG: unable to handle kernel null pointer dereference at 0000000000000034", with call chain: i40iw_inetaddr_event notifier_call_chain blocking_notifier_call_chain notifier_call_chain __inet_del_ifa inet_rtm_deladdr rtnetlink_rcv_msg netlink_rcv_skb rtnetlink_rcv netlink_unicast netlink_sendmsg sock_sendmsg __sys_sendto It is caused by "local_ipaddr = ntohl(in->ifa_list->ifa_address)", while the in->ifa_list is NULL. So add a check for the "in->ifa_list == NULL" case, and skip the ARP operation accordingly. Signed-off-by: Feng Tang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/i40iw/i40iw_utils.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/i40iw/i40iw_utils.c b/drivers/infiniband/hw/i40iw/i40iw_utils.c index c5a881172524..337410f40860 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_utils.c +++ b/drivers/infiniband/hw/i40iw/i40iw_utils.c @@ -173,7 +173,12 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, rcu_read_lock(); in = __in_dev_get_rcu(upper_dev); - local_ipaddr = ntohl(in->ifa_list->ifa_address); + + if (!in->ifa_list) + local_ipaddr = 0; + else + local_ipaddr = ntohl(in->ifa_list->ifa_address); + rcu_read_unlock(); } else { local_ipaddr = ntohl(ifa->ifa_address); @@ -185,6 +190,11 @@ int i40iw_inetaddr_event(struct notifier_block *notifier, case NETDEV_UP: /* Fall through */ case NETDEV_CHANGEADDR: + + /* Just skip if no need to handle ARP cache */ + if (!local_ipaddr) + break; + i40iw_manage_arp_cache(iwdev, netdev->dev_addr, &local_ipaddr, -- cgit From 65e9a6d25deb752d97b88335068dc0e7accbc9c3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 17 Mar 2019 17:17:45 -0700 Subject: networking: fix snmp_counter.rst Doc. Warnings Fix documentation markup warnings in snmp_counter.rst: Documentation/networking/snmp_counter.rst:416: WARNING: Title underline too short. Documentation/networking/snmp_counter.rst:684: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:693: WARNING: Title underline too short. Documentation/networking/snmp_counter.rst:707: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:712: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:722: WARNING: Title underline too short. Documentation/networking/snmp_counter.rst:733: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:736: WARNING: Bullet list ends without a blank line; unexpected unindent. Documentation/networking/snmp_counter.rst:739: WARNING: Bullet list ends without a blank line; unexpected unindent. Fixes: 80cc49507ba48 ("net: Add part of TCP counts explanations in snmp_counters.rst") Fixes: 8e2ea53a83dfb ("add snmp counters document") Fixes: a6c7c7aac2de6 ("net: add document for several snmp counters") Signed-off-by: Randy Dunlap Cc: yupeng --- Documentation/networking/snmp_counter.rst | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst index 52b026be028f..38a4edc4522b 100644 --- a/Documentation/networking/snmp_counter.rst +++ b/Documentation/networking/snmp_counter.rst @@ -413,7 +413,7 @@ algorithm. .. _F-RTO: https://tools.ietf.org/html/rfc5682 TCP Fast Path -============ +============= When kernel receives a TCP packet, it has two paths to handler the packet, one is fast path, another is slow path. The comment in kernel code provides a good explanation of them, I pasted them below:: @@ -681,6 +681,7 @@ The TCP stack receives an out of order duplicate packet, so it sends a DSACK to the sender. * TcpExtTCPDSACKRecv + The TCP stack receives a DSACK, which indicates an acknowledged duplicate packet is received. @@ -690,7 +691,7 @@ The TCP stack receives a DSACK, which indicate an out of order duplicate packet is received. invalid SACK and DSACK -==================== +====================== When a SACK (or DSACK) block is invalid, a corresponding counter would be updated. The validation method is base on the start/end sequence number of the SACK block. For more details, please refer the comment @@ -704,11 +705,13 @@ explaination: .. _Add counters for discarded SACK blocks: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=18f02545a9a16c9a89778b91a162ad16d510bb32 * TcpExtTCPSACKDiscard + This counter indicates how many SACK blocks are invalid. If the invalid SACK block is caused by ACK recording, the TCP stack will only ignore it and won't update this counter. * TcpExtTCPDSACKIgnoredOld and TcpExtTCPDSACKIgnoredNoUndo + When a DSACK block is invalid, one of these two counters would be updated. Which counter will be updated depends on the undo_marker flag of the TCP socket. If the undo_marker is not set, the TCP stack isn't @@ -719,7 +722,7 @@ will be updated. If the undo_marker is set, TcpExtTCPDSACKIgnoredOld will be updated. As implied in its name, it might be an old packet. SACK shift -========= +========== The linux networking stack stores data in sk_buff struct (skb for short). If a SACK block acrosses multiple skb, the TCP stack will try to re-arrange data in these skb. E.g. if a SACK block acknowledges seq @@ -730,12 +733,15 @@ seq 14 to 20. All data in skb2 will be moved to skb1, and skb2 will be discard, this operation is 'merge'. * TcpExtTCPSackShifted + A skb is shifted * TcpExtTCPSackMerged + A skb is merged * TcpExtTCPSackShiftFallback + A skb should be shifted or merged, but the TCP stack doesn't do it for some reasons. -- cgit From ea239314fe42ace880bdd834256834679346c80e Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Sun, 17 Mar 2019 18:46:42 +0100 Subject: tipc: allow service ranges to be connect()'ed on RDM/DGRAM We move the check that prevents connecting service ranges to after the RDM/DGRAM check, and move address sanity control to a separate function that also validates the service range. Fixes: 23998835be98 ("tipc: improve address sanity check in tipc_connect()") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index d6b26862b34e..b542f14ed444 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2349,6 +2349,16 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return 0; } +static bool tipc_sockaddr_is_sane(struct sockaddr_tipc *addr) +{ + if (addr->family != AF_TIPC) + return false; + if (addr->addrtype == TIPC_SERVICE_RANGE) + return (addr->addr.nameseq.lower <= addr->addr.nameseq.upper); + return (addr->addrtype == TIPC_SERVICE_ADDR || + addr->addrtype == TIPC_SOCKET_ADDR); +} + /** * tipc_connect - establish a connection to another TIPC port * @sock: socket structure @@ -2384,18 +2394,18 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (!tipc_sk_type_connectionless(sk)) res = -EINVAL; goto exit; - } else if (dst->family != AF_TIPC) { - res = -EINVAL; } - if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME) + if (!tipc_sockaddr_is_sane(dst)) { res = -EINVAL; - if (res) goto exit; - + } /* DGRAM/RDM connect(), just save the destaddr */ if (tipc_sk_type_connectionless(sk)) { memcpy(&tsk->peer, dest, destlen); goto exit; + } else if (dst->addrtype == TIPC_SERVICE_RANGE) { + res = -EINVAL; + goto exit; } previous = sk->sk_state; -- cgit From cd1b772d4881d1cd15b90ec17aab9ac7950e8850 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Mon, 29 Oct 2018 16:32:31 +0100 Subject: driver core: remove BUS_ATTR() There are now no in-kernel users of BUS_ATTR() so drop it from device.h Everyone should use BUS_ATTR_RO/RW/WO() from now on. Cc: "Rafael J. Wysocki" Signed-off-by: Greg Kroah-Hartman --- include/linux/device.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/linux/device.h b/include/linux/device.h index b425a7ee04ce..4e6987e11f68 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -49,8 +49,6 @@ struct bus_attribute { ssize_t (*store)(struct bus_type *bus, const char *buf, size_t count); }; -#define BUS_ATTR(_name, _mode, _show, _store) \ - struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store) #define BUS_ATTR_RW(_name) \ struct bus_attribute bus_attr_##_name = __ATTR_RW(_name) #define BUS_ATTR_RO(_name) \ -- cgit From bd31342f0046077e92062a6c09eae6c8f1676916 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 12 Mar 2019 10:09:37 +1100 Subject: staging: remove mt7621-eth driver/net/ethernet/mediatek/ now supports this hardware, so we don't need a separate driver. Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/staging/Kconfig | 2 - drivers/staging/Makefile | 1 - .../devicetree/bindings/net/mediatek-net-gsw.txt | 48 - drivers/staging/mt7621-eth/Kconfig | 39 - drivers/staging/mt7621-eth/Makefile | 14 - drivers/staging/mt7621-eth/TODO | 13 - drivers/staging/mt7621-eth/ethtool.c | 250 --- drivers/staging/mt7621-eth/ethtool.h | 15 - drivers/staging/mt7621-eth/gsw_mt7620.h | 277 --- drivers/staging/mt7621-eth/gsw_mt7621.c | 297 --- drivers/staging/mt7621-eth/mdio.c | 275 --- drivers/staging/mt7621-eth/mdio.h | 27 - drivers/staging/mt7621-eth/mdio_mt7620.c | 173 -- drivers/staging/mt7621-eth/mtk_eth_soc.c | 2176 -------------------- drivers/staging/mt7621-eth/mtk_eth_soc.h | 716 ------- drivers/staging/mt7621-eth/soc_mt7621.c | 161 -- 16 files changed, 4484 deletions(-) delete mode 100644 drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt delete mode 100644 drivers/staging/mt7621-eth/Kconfig delete mode 100644 drivers/staging/mt7621-eth/Makefile delete mode 100644 drivers/staging/mt7621-eth/TODO delete mode 100644 drivers/staging/mt7621-eth/ethtool.c delete mode 100644 drivers/staging/mt7621-eth/ethtool.h delete mode 100644 drivers/staging/mt7621-eth/gsw_mt7620.h delete mode 100644 drivers/staging/mt7621-eth/gsw_mt7621.c delete mode 100644 drivers/staging/mt7621-eth/mdio.c delete mode 100644 drivers/staging/mt7621-eth/mdio.h delete mode 100644 drivers/staging/mt7621-eth/mdio_mt7620.c delete mode 100644 drivers/staging/mt7621-eth/mtk_eth_soc.c delete mode 100644 drivers/staging/mt7621-eth/mtk_eth_soc.h delete mode 100644 drivers/staging/mt7621-eth/soc_mt7621.c diff --git a/drivers/staging/Kconfig b/drivers/staging/Kconfig index c0901b96cfe4..62951e836cbc 100644 --- a/drivers/staging/Kconfig +++ b/drivers/staging/Kconfig @@ -114,8 +114,6 @@ source "drivers/staging/ralink-gdma/Kconfig" source "drivers/staging/mt7621-mmc/Kconfig" -source "drivers/staging/mt7621-eth/Kconfig" - source "drivers/staging/mt7621-dts/Kconfig" source "drivers/staging/gasket/Kconfig" diff --git a/drivers/staging/Makefile b/drivers/staging/Makefile index 57c6bce13ff4..d1b17ddcd354 100644 --- a/drivers/staging/Makefile +++ b/drivers/staging/Makefile @@ -47,7 +47,6 @@ obj-$(CONFIG_SPI_MT7621) += mt7621-spi/ obj-$(CONFIG_SOC_MT7621) += mt7621-dma/ obj-$(CONFIG_DMA_RALINK) += ralink-gdma/ obj-$(CONFIG_MTK_MMC) += mt7621-mmc/ -obj-$(CONFIG_NET_MEDIATEK_SOC_STAGING) += mt7621-eth/ obj-$(CONFIG_SOC_MT7621) += mt7621-dts/ obj-$(CONFIG_STAGING_GASKET_FRAMEWORK) += gasket/ obj-$(CONFIG_XIL_AXIS_FIFO) += axis-fifo/ diff --git a/drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt b/drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt deleted file mode 100644 index 596b38552697..000000000000 --- a/drivers/staging/mt7621-eth/Documentation/devicetree/bindings/net/mediatek-net-gsw.txt +++ /dev/null @@ -1,48 +0,0 @@ -Mediatek Gigabit Switch -======================= - -The mediatek gigabit switch can be found on Mediatek SoCs. - -Required properties: -- compatible: Should be "mediatek,mt7620-gsw", "mediatek,mt7621-gsw", - "mediatek,mt7623-gsw" -- reg: Address and length of the register set for the device -- interrupts: Should contain the gigabit switches interrupt - - -Additional required properties for ARM based SoCs: -- mediatek,reset-pin: phandle describing the reset GPIO -- clocks: the clocks used by the switch -- clock-names: the names of the clocks listed in the clocks property - these should be "trgpll", "esw", "gp2", "gp1" -- mt7530-supply: the phandle of the regulator used to power the switch -- mediatek,pctl-regmap: phandle to the port control regmap. this is used to - setup the drive current - - -Optional properties: -- interrupt-parent: Should be the phandle for the interrupt controller - that services interrupts for this device - -Example: - -gsw: switch@1b100000 { - compatible = "mediatek,mt7623-gsw"; - reg = <0 0x1b110000 0 0x300000>; - - interrupt-parent = <&pio>; - interrupts = <168 IRQ_TYPE_EDGE_RISING>; - - clocks = <&apmixedsys CLK_APMIXED_TRGPLL>, - <ðsys CLK_ETHSYS_ESW>, - <ðsys CLK_ETHSYS_GP2>, - <ðsys CLK_ETHSYS_GP1>; - clock-names = "trgpll", "esw", "gp2", "gp1"; - - mt7530-supply = <&mt6323_vpa_reg>; - - mediatek,pctl-regmap = <&syscfg_pctl_a>; - mediatek,reset-pin = <&pio 15 0>; - - status = "okay"; -}; diff --git a/drivers/staging/mt7621-eth/Kconfig b/drivers/staging/mt7621-eth/Kconfig deleted file mode 100644 index 44ea86c7a96c..000000000000 --- a/drivers/staging/mt7621-eth/Kconfig +++ /dev/null @@ -1,39 +0,0 @@ -config NET_VENDOR_MEDIATEK_STAGING - bool "MediaTek ethernet driver - staging version" - depends on RALINK - ---help--- - If you have an MT7621 Mediatek SoC with ethernet, say Y. - -if NET_VENDOR_MEDIATEK_STAGING -choice - prompt "MAC type" - -config NET_MEDIATEK_MT7621 - bool "MT7621" - depends on MIPS && SOC_MT7621 - -endchoice - -config NET_MEDIATEK_SOC_STAGING - tristate "MediaTek SoC Gigabit Ethernet support" - depends on NET_VENDOR_MEDIATEK_STAGING - select PHYLIB - ---help--- - This driver supports the gigabit ethernet MACs in the - MediaTek SoC family. - -config NET_MEDIATEK_MDIO - def_bool NET_MEDIATEK_SOC_STAGING - depends on NET_MEDIATEK_MT7621 - select PHYLIB - -config NET_MEDIATEK_MDIO_MT7620 - def_bool NET_MEDIATEK_SOC_STAGING - depends on NET_MEDIATEK_MT7621 - select NET_MEDIATEK_MDIO - -config NET_MEDIATEK_GSW_MT7621 - def_tristate NET_MEDIATEK_SOC_STAGING - depends on NET_MEDIATEK_MT7621 - -endif #NET_VENDOR_MEDIATEK_STAGING diff --git a/drivers/staging/mt7621-eth/Makefile b/drivers/staging/mt7621-eth/Makefile deleted file mode 100644 index 018bcc3596b3..000000000000 --- a/drivers/staging/mt7621-eth/Makefile +++ /dev/null @@ -1,14 +0,0 @@ -# -# Makefile for the Ralink SoCs built-in ethernet macs -# - -mtk-eth-soc-y += mtk_eth_soc.o ethtool.o - -mtk-eth-soc-$(CONFIG_NET_MEDIATEK_MDIO) += mdio.o -mtk-eth-soc-$(CONFIG_NET_MEDIATEK_MDIO_MT7620) += mdio_mt7620.o - -mtk-eth-soc-$(CONFIG_NET_MEDIATEK_MT7621) += soc_mt7621.o - -obj-$(CONFIG_NET_MEDIATEK_GSW_MT7621) += gsw_mt7621.o - -obj-$(CONFIG_NET_MEDIATEK_SOC_STAGING) += mtk-eth-soc.o diff --git a/drivers/staging/mt7621-eth/TODO b/drivers/staging/mt7621-eth/TODO deleted file mode 100644 index f9e47d4b4cd4..000000000000 --- a/drivers/staging/mt7621-eth/TODO +++ /dev/null @@ -1,13 +0,0 @@ - -- verify devicetree documentation is consistent with code -- fix ethtool - currently doesn't return valid data. -- general code review and clean up -- add support for second MAC on mt7621 -- convert gsw code to use switchdev interfaces -- md7620_mmi_write etc should probably be wrapped - in a regmap abstraction. -- Get soc_mt7621 to work with QDMA TX if possible. -- Ensure phys are correctly configured when a cable - is plugged in. - -Cc: NeilBrown diff --git a/drivers/staging/mt7621-eth/ethtool.c b/drivers/staging/mt7621-eth/ethtool.c deleted file mode 100644 index 8c4228e2c987..000000000000 --- a/drivers/staging/mt7621-eth/ethtool.c +++ /dev/null @@ -1,250 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#include "mtk_eth_soc.h" -#include "ethtool.h" - -struct mtk_stat { - char name[ETH_GSTRING_LEN]; - unsigned int idx; -}; - -#define MTK_HW_STAT(stat) { \ - .name = #stat, \ - .idx = offsetof(struct mtk_hw_stats, stat) / sizeof(u64) \ -} - -static const struct mtk_stat mtk_ethtool_hw_stats[] = { - MTK_HW_STAT(tx_bytes), - MTK_HW_STAT(tx_packets), - MTK_HW_STAT(tx_skip), - MTK_HW_STAT(tx_collisions), - MTK_HW_STAT(rx_bytes), - MTK_HW_STAT(rx_packets), - MTK_HW_STAT(rx_overflow), - MTK_HW_STAT(rx_fcs_errors), - MTK_HW_STAT(rx_short_errors), - MTK_HW_STAT(rx_long_errors), - MTK_HW_STAT(rx_checksum_errors), - MTK_HW_STAT(rx_flow_control_packets), -}; - -#define MTK_HW_STATS_LEN ARRAY_SIZE(mtk_ethtool_hw_stats) - -static int mtk_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) -{ - struct mtk_mac *mac = netdev_priv(dev); - int err; - - if (!mac->phy_dev) - return -ENODEV; - - if (mac->phy_flags == MTK_PHY_FLAG_ATTACH) { - err = phy_read_status(mac->phy_dev); - if (err) - return -ENODEV; - } - - phy_ethtool_ksettings_get(mac->phy_dev, cmd); - return 0; -} - -static int mtk_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) -{ - struct mtk_mac *mac = netdev_priv(dev); - - if (!mac->phy_dev) - return -ENODEV; - - if (cmd->base.phy_address != mac->phy_dev->mdio.addr) { - if (mac->hw->phy->phy_node[cmd->base.phy_address]) { - mac->phy_dev = mac->hw->phy->phy[cmd->base.phy_address]; - mac->phy_flags = MTK_PHY_FLAG_PORT; - } else if (mac->hw->mii_bus) { - mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus, - cmd->base.phy_address); - if (!mac->phy_dev) - return -ENODEV; - mac->phy_flags = MTK_PHY_FLAG_ATTACH; - } else { - return -ENODEV; - } - } - - return phy_ethtool_ksettings_set(mac->phy_dev, cmd); -} - -static void mtk_get_drvinfo(struct net_device *dev, - struct ethtool_drvinfo *info) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_soc_data *soc = mac->hw->soc; - - strlcpy(info->driver, mac->hw->dev->driver->name, sizeof(info->driver)); - strlcpy(info->bus_info, dev_name(mac->hw->dev), sizeof(info->bus_info)); - - if (soc->reg_table[MTK_REG_MTK_COUNTER_BASE]) - info->n_stats = MTK_HW_STATS_LEN; -} - -static u32 mtk_get_msglevel(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - - return mac->hw->msg_enable; -} - -static void mtk_set_msglevel(struct net_device *dev, u32 value) -{ - struct mtk_mac *mac = netdev_priv(dev); - - mac->hw->msg_enable = value; -} - -static int mtk_nway_reset(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - - if (!mac->phy_dev) - return -EOPNOTSUPP; - - return genphy_restart_aneg(mac->phy_dev); -} - -static u32 mtk_get_link(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - int err; - - if (!mac->phy_dev) - goto out_get_link; - - if (mac->phy_flags == MTK_PHY_FLAG_ATTACH) { - err = genphy_update_link(mac->phy_dev); - if (err) - goto out_get_link; - } - - return mac->phy_dev->link; - -out_get_link: - return ethtool_op_get_link(dev); -} - -static int mtk_set_ringparam(struct net_device *dev, - struct ethtool_ringparam *ring) -{ - struct mtk_mac *mac = netdev_priv(dev); - - if ((ring->tx_pending < 2) || - (ring->rx_pending < 2) || - (ring->rx_pending > mac->hw->soc->dma_ring_size) || - (ring->tx_pending > mac->hw->soc->dma_ring_size)) - return -EINVAL; - - dev->netdev_ops->ndo_stop(dev); - - mac->hw->tx_ring.tx_ring_size = BIT(fls(ring->tx_pending) - 1); - mac->hw->rx_ring[0].rx_ring_size = BIT(fls(ring->rx_pending) - 1); - - return dev->netdev_ops->ndo_open(dev); -} - -static void mtk_get_ringparam(struct net_device *dev, - struct ethtool_ringparam *ring) -{ - struct mtk_mac *mac = netdev_priv(dev); - - ring->rx_max_pending = mac->hw->soc->dma_ring_size; - ring->tx_max_pending = mac->hw->soc->dma_ring_size; - ring->rx_pending = mac->hw->rx_ring[0].rx_ring_size; - ring->tx_pending = mac->hw->tx_ring.tx_ring_size; -} - -static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data) -{ - int i; - - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < MTK_HW_STATS_LEN; i++) { - memcpy(data, mtk_ethtool_hw_stats[i].name, - ETH_GSTRING_LEN); - data += ETH_GSTRING_LEN; - } - break; - } -} - -static int mtk_get_sset_count(struct net_device *dev, int sset) -{ - switch (sset) { - case ETH_SS_STATS: - return MTK_HW_STATS_LEN; - default: - return -EOPNOTSUPP; - } -} - -static void mtk_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, u64 *data) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_hw_stats *hwstats = mac->hw_stats; - unsigned int start; - int i; - - if (netif_running(dev) && netif_device_present(dev)) { - if (spin_trylock(&hwstats->stats_lock)) { - mtk_stats_update_mac(mac); - spin_unlock(&hwstats->stats_lock); - } - } - - do { - start = u64_stats_fetch_begin_irq(&hwstats->syncp); - for (i = 0; i < MTK_HW_STATS_LEN; i++) - data[i] = ((u64 *)hwstats)[mtk_ethtool_hw_stats[i].idx]; - - } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); -} - -static struct ethtool_ops mtk_ethtool_ops = { - .get_link_ksettings = mtk_get_link_ksettings, - .set_link_ksettings = mtk_set_link_ksettings, - .get_drvinfo = mtk_get_drvinfo, - .get_msglevel = mtk_get_msglevel, - .set_msglevel = mtk_set_msglevel, - .nway_reset = mtk_nway_reset, - .get_link = mtk_get_link, - .set_ringparam = mtk_set_ringparam, - .get_ringparam = mtk_get_ringparam, -}; - -void mtk_set_ethtool_ops(struct net_device *netdev) -{ - struct mtk_mac *mac = netdev_priv(netdev); - struct mtk_soc_data *soc = mac->hw->soc; - - if (soc->reg_table[MTK_REG_MTK_COUNTER_BASE]) { - mtk_ethtool_ops.get_strings = mtk_get_strings; - mtk_ethtool_ops.get_sset_count = mtk_get_sset_count; - mtk_ethtool_ops.get_ethtool_stats = mtk_get_ethtool_stats; - } - - netdev->ethtool_ops = &mtk_ethtool_ops; -} diff --git a/drivers/staging/mt7621-eth/ethtool.h b/drivers/staging/mt7621-eth/ethtool.h deleted file mode 100644 index 0071469aea6c..000000000000 --- a/drivers/staging/mt7621-eth/ethtool.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#ifndef MTK_ETHTOOL_H -#define MTK_ETHTOOL_H - -#include - -void mtk_set_ethtool_ops(struct net_device *netdev); - -#endif /* MTK_ETHTOOL_H */ diff --git a/drivers/staging/mt7621-eth/gsw_mt7620.h b/drivers/staging/mt7621-eth/gsw_mt7620.h deleted file mode 100644 index 70f7e5481952..000000000000 --- a/drivers/staging/mt7621-eth/gsw_mt7620.h +++ /dev/null @@ -1,277 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#ifndef _RALINK_GSW_MT7620_H__ -#define _RALINK_GSW_MT7620_H__ - -#define GSW_REG_PHY_TIMEOUT (5 * HZ) - -#define MT7620_GSW_REG_PIAC 0x0004 - -#define GSW_NUM_VLANS 16 -#define GSW_NUM_VIDS 4096 -#define GSW_NUM_PORTS 7 -#define GSW_PORT6 6 - -#define GSW_MDIO_ACCESS BIT(31) -#define GSW_MDIO_READ BIT(19) -#define GSW_MDIO_WRITE BIT(18) -#define GSW_MDIO_START BIT(16) -#define GSW_MDIO_ADDR_SHIFT 20 -#define GSW_MDIO_REG_SHIFT 25 - -#define GSW_REG_PORT_PMCR(x) (0x3000 + (x * 0x100)) -#define GSW_REG_PORT_STATUS(x) (0x3008 + (x * 0x100)) -#define GSW_REG_SMACCR0 0x3fE4 -#define GSW_REG_SMACCR1 0x3fE8 -#define GSW_REG_CKGCR 0x3ff0 - -#define GSW_REG_IMR 0x7008 -#define GSW_REG_ISR 0x700c -#define GSW_REG_GPC1 0x7014 - -#define SYSC_REG_CHIP_REV_ID 0x0c -#define SYSC_REG_CFG 0x10 -#define SYSC_REG_CFG1 0x14 -#define RST_CTRL_MCM BIT(2) -#define SYSC_PAD_RGMII2_MDIO 0x58 -#define SYSC_GPIO_MODE 0x60 - -#define PORT_IRQ_ST_CHG 0x7f - -#define MT7621_ESW_PHY_POLLING 0x0000 -#define MT7620_ESW_PHY_POLLING 0x7000 - -#define PMCR_IPG BIT(18) -#define PMCR_MAC_MODE BIT(16) -#define PMCR_FORCE BIT(15) -#define PMCR_TX_EN BIT(14) -#define PMCR_RX_EN BIT(13) -#define PMCR_BACKOFF BIT(9) -#define PMCR_BACKPRES BIT(8) -#define PMCR_RX_FC BIT(5) -#define PMCR_TX_FC BIT(4) -#define PMCR_SPEED(_x) (_x << 2) -#define PMCR_DUPLEX BIT(1) -#define PMCR_LINK BIT(0) - -#define PHY_AN_EN BIT(31) -#define PHY_PRE_EN BIT(30) -#define PMY_MDC_CONF(_x) ((_x & 0x3f) << 24) - -/* ethernet subsystem config register */ -#define ETHSYS_SYSCFG0 0x14 -/* ethernet subsystem clock register */ -#define ETHSYS_CLKCFG0 0x2c -#define ETHSYS_TRGMII_CLK_SEL362_5 BIT(11) - -/* p5 RGMII wrapper TX clock control register */ -#define MT7530_P5RGMIITXCR 0x7b04 -/* p5 RGMII wrapper RX clock control register */ -#define MT7530_P5RGMIIRXCR 0x7b00 -/* TRGMII TDX ODT registers */ -#define MT7530_TRGMII_TD0_ODT 0x7a54 -#define MT7530_TRGMII_TD1_ODT 0x7a5c -#define MT7530_TRGMII_TD2_ODT 0x7a64 -#define MT7530_TRGMII_TD3_ODT 0x7a6c -#define MT7530_TRGMII_TD4_ODT 0x7a74 -#define MT7530_TRGMII_TD5_ODT 0x7a7c -/* TRGMII TCK ctrl register */ -#define MT7530_TRGMII_TCK_CTRL 0x7a78 -/* TRGMII Tx ctrl register */ -#define MT7530_TRGMII_TXCTRL 0x7a40 -/* port 6 extended control register */ -#define MT7530_P6ECR 0x7830 -/* IO driver control register */ -#define MT7530_IO_DRV_CR 0x7810 -/* top signal control register */ -#define MT7530_TOP_SIG_CTRL 0x7808 -/* modified hwtrap register */ -#define MT7530_MHWTRAP 0x7804 -/* hwtrap status register */ -#define MT7530_HWTRAP 0x7800 -/* status interrupt register */ -#define MT7530_SYS_INT_STS 0x700c -/* system nterrupt register */ -#define MT7530_SYS_INT_EN 0x7008 -/* system control register */ -#define MT7530_SYS_CTRL 0x7000 -/* port MAC status register */ -#define MT7530_PMSR_P(x) (0x3008 + (x * 0x100)) -/* port MAC control register */ -#define MT7530_PMCR_P(x) (0x3000 + (x * 0x100)) - -#define MT7621_XTAL_SHIFT 6 -#define MT7621_XTAL_MASK 0x7 -#define MT7621_XTAL_25 6 -#define MT7621_XTAL_40 3 -#define MT7621_MDIO_DRV_MASK (3 << 4) -#define MT7621_GE1_MODE_MASK (3 << 12) - -#define TRGMII_TXCTRL_TXC_INV BIT(30) -#define P6ECR_INTF_MODE_RGMII BIT(1) -#define P5RGMIIRXCR_C_ALIGN BIT(8) -#define P5RGMIIRXCR_DELAY_2 BIT(1) -#define P5RGMIITXCR_DELAY_2 (BIT(8) | BIT(2)) - -/* TOP_SIG_CTRL bits */ -#define TOP_SIG_CTRL_NORMAL (BIT(17) | BIT(16)) - -/* MHWTRAP bits */ -#define MHWTRAP_MANUAL BIT(16) -#define MHWTRAP_P5_MAC_SEL BIT(13) -#define MHWTRAP_P6_DIS BIT(8) -#define MHWTRAP_P5_RGMII_MODE BIT(7) -#define MHWTRAP_P5_DIS BIT(6) -#define MHWTRAP_PHY_ACCESS BIT(5) - -/* HWTRAP bits */ -#define HWTRAP_XTAL_SHIFT 9 -#define HWTRAP_XTAL_MASK 0x3 - -/* SYS_CTRL bits */ -#define SYS_CTRL_SW_RST BIT(1) -#define SYS_CTRL_REG_RST BIT(0) - -/* PMCR bits */ -#define PMCR_IFG_XMIT_96 BIT(18) -#define PMCR_MAC_MODE BIT(16) -#define PMCR_FORCE_MODE BIT(15) -#define PMCR_TX_EN BIT(14) -#define PMCR_RX_EN BIT(13) -#define PMCR_BACK_PRES_EN BIT(9) -#define PMCR_BACKOFF_EN BIT(8) -#define PMCR_TX_FC_EN BIT(5) -#define PMCR_RX_FC_EN BIT(4) -#define PMCR_FORCE_SPEED_1000 BIT(3) -#define PMCR_FORCE_FDX BIT(1) -#define PMCR_FORCE_LNK BIT(0) -#define PMCR_FIXED_LINK (PMCR_IFG_XMIT_96 | PMCR_MAC_MODE | \ - PMCR_FORCE_MODE | PMCR_TX_EN | PMCR_RX_EN | \ - PMCR_BACK_PRES_EN | PMCR_BACKOFF_EN | \ - PMCR_FORCE_SPEED_1000 | PMCR_FORCE_FDX | \ - PMCR_FORCE_LNK) - -#define PMCR_FIXED_LINK_FC (PMCR_FIXED_LINK | \ - PMCR_TX_FC_EN | PMCR_RX_FC_EN) - -/* TRGMII control registers */ -#define GSW_INTF_MODE 0x390 -#define GSW_TRGMII_TD0_ODT 0x354 -#define GSW_TRGMII_TD1_ODT 0x35c -#define GSW_TRGMII_TD2_ODT 0x364 -#define GSW_TRGMII_TD3_ODT 0x36c -#define GSW_TRGMII_TXCTL_ODT 0x374 -#define GSW_TRGMII_TCK_ODT 0x37c -#define GSW_TRGMII_RCK_CTRL 0x300 - -#define INTF_MODE_TRGMII BIT(1) -#define TRGMII_RCK_CTRL_RX_RST BIT(31) - -/* Mac control registers */ -#define MTK_MAC_P2_MCR 0x200 -#define MTK_MAC_P1_MCR 0x100 - -#define MAC_MCR_MAX_RX_2K BIT(29) -#define MAC_MCR_IPG_CFG (BIT(18) | BIT(16)) -#define MAC_MCR_FORCE_MODE BIT(15) -#define MAC_MCR_TX_EN BIT(14) -#define MAC_MCR_RX_EN BIT(13) -#define MAC_MCR_BACKOFF_EN BIT(9) -#define MAC_MCR_BACKPR_EN BIT(8) -#define MAC_MCR_FORCE_RX_FC BIT(5) -#define MAC_MCR_FORCE_TX_FC BIT(4) -#define MAC_MCR_SPEED_1000 BIT(3) -#define MAC_MCR_FORCE_DPX BIT(1) -#define MAC_MCR_FORCE_LINK BIT(0) -#define MAC_MCR_FIXED_LINK (MAC_MCR_MAX_RX_2K | MAC_MCR_IPG_CFG | \ - MAC_MCR_FORCE_MODE | MAC_MCR_TX_EN | \ - MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | \ - MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_RX_FC | \ - MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \ - MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK) -#define MAC_MCR_FIXED_LINK_FC (MAC_MCR_MAX_RX_2K | MAC_MCR_IPG_CFG | \ - MAC_MCR_FIXED_LINK) - -/* possible XTAL speed */ -#define MT7623_XTAL_40 0 -#define MT7623_XTAL_20 1 -#define MT7623_XTAL_25 3 - -/* GPIO port control registers */ -#define GPIO_OD33_CTRL8 0x4c0 -#define GPIO_BIAS_CTRL 0xed0 -#define GPIO_DRV_SEL10 0xf00 - -/* on MT7620 the functio of port 4 can be software configured */ -enum { - PORT4_EPHY = 0, - PORT4_EXT, -}; - -/* struct mt7620_gsw - the structure that holds the SoC specific data - * @dev: The Device struct - * @base: The base address - * @piac_offset: The PIAC base may change depending on SoC - * @irq: The IRQ we are using - * @port4: The port4 mode on MT7620 - * @autopoll: Is MDIO autopolling enabled - * @ethsys: The ethsys register map - * @pctl: The pin control register map - * @clk_gsw: The switch clock - * @clk_gp1: The gmac1 clock - * @clk_gp2: The gmac2 clock - * @clk_trgpll: The trgmii pll clock - */ -struct mt7620_gsw { - struct device *dev; - void __iomem *base; - u32 piac_offset; - int irq; - int port4; - unsigned long int autopoll; - - struct regmap *ethsys; - struct regmap *pctl; - - struct clk *clk_gsw; - struct clk *clk_gp1; - struct clk *clk_gp2; - struct clk *clk_trgpll; -}; - -/* switch register I/O wrappers */ -void mtk_switch_w32(struct mt7620_gsw *gsw, u32 val, unsigned int reg); -u32 mtk_switch_r32(struct mt7620_gsw *gsw, unsigned int reg); - -/* the callback used by the driver core to bringup the switch */ -int mtk_gsw_init(struct mtk_eth *eth); - -/* MDIO access wrappers */ -int mt7620_mdio_write(struct mii_bus *bus, int phy_addr, int phy_reg, u16 val); -int mt7620_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg); -void mt7620_mdio_link_adjust(struct mtk_eth *eth, int port); -int mt7620_has_carrier(struct mtk_eth *eth); -void mt7620_print_link_state(struct mtk_eth *eth, int port, int link, - int speed, int duplex); -void mt7530_mdio_w32(struct mt7620_gsw *gsw, u32 reg, u32 val); -u32 mt7530_mdio_r32(struct mt7620_gsw *gsw, u32 reg); -void mt7530_mdio_m32(struct mt7620_gsw *gsw, u32 mask, u32 set, u32 reg); - -u32 _mt7620_mii_write(struct mt7620_gsw *gsw, u32 phy_addr, - u32 phy_register, u32 write_data); -u32 _mt7620_mii_read(struct mt7620_gsw *gsw, int phy_addr, int phy_reg); -void mt7620_handle_carrier(struct mtk_eth *eth); - -#endif diff --git a/drivers/staging/mt7621-eth/gsw_mt7621.c b/drivers/staging/mt7621-eth/gsw_mt7621.c deleted file mode 100644 index 53767b17bad9..000000000000 --- a/drivers/staging/mt7621-eth/gsw_mt7621.c +++ /dev/null @@ -1,297 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#include -#include -#include -#include -#include -#include - -#include - -#include "mtk_eth_soc.h" -#include "gsw_mt7620.h" - -void mtk_switch_w32(struct mt7620_gsw *gsw, u32 val, unsigned int reg) -{ - iowrite32(val, gsw->base + reg); -} -EXPORT_SYMBOL_GPL(mtk_switch_w32); - -u32 mtk_switch_r32(struct mt7620_gsw *gsw, unsigned int reg) -{ - return ioread32(gsw->base + reg); -} -EXPORT_SYMBOL_GPL(mtk_switch_r32); - -static irqreturn_t gsw_interrupt_mt7621(int irq, void *_eth) -{ - struct mtk_eth *eth = (struct mtk_eth *)_eth; - struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv; - u32 reg, i; - - reg = mt7530_mdio_r32(gsw, MT7530_SYS_INT_STS); - - for (i = 0; i < 5; i++) { - unsigned int link; - - if ((reg & BIT(i)) == 0) - continue; - - link = mt7530_mdio_r32(gsw, MT7530_PMSR_P(i)) & 0x1; - - if (link == eth->link[i]) - continue; - - eth->link[i] = link; - if (link) - netdev_info(*eth->netdev, - "port %d link up\n", i); - else - netdev_info(*eth->netdev, - "port %d link down\n", i); - } - - mt7530_mdio_w32(gsw, MT7530_SYS_INT_STS, 0x1f); - - return IRQ_HANDLED; -} - -static void mt7621_hw_init(struct mtk_eth *eth, struct mt7620_gsw *gsw, - struct device_node *np) -{ - u32 i; - u32 val; - - /* hardware reset the switch */ - mtk_reset(eth, RST_CTRL_MCM); - mdelay(10); - - /* reduce RGMII2 PAD driving strength */ - rt_sysc_m32(MT7621_MDIO_DRV_MASK, 0, SYSC_PAD_RGMII2_MDIO); - - /* gpio mux - RGMII1=Normal mode */ - rt_sysc_m32(BIT(14), 0, SYSC_GPIO_MODE); - - /* set GMAC1 RGMII mode */ - rt_sysc_m32(MT7621_GE1_MODE_MASK, 0, SYSC_REG_CFG1); - - /* enable MDIO to control MT7530 */ - rt_sysc_m32(3 << 12, 0, SYSC_GPIO_MODE); - - /* turn off all PHYs */ - for (i = 0; i <= 4; i++) { - val = _mt7620_mii_read(gsw, i, 0x0); - val |= BIT(11); - _mt7620_mii_write(gsw, i, 0x0, val); - } - - /* reset the switch */ - mt7530_mdio_w32(gsw, MT7530_SYS_CTRL, - SYS_CTRL_SW_RST | SYS_CTRL_REG_RST); - usleep_range(10, 20); - - if ((rt_sysc_r32(SYSC_REG_CHIP_REV_ID) & 0xFFFF) == 0x0101) { - /* GE1, Force 1000M/FD, FC ON, MAX_RX_LENGTH 1536 */ - mtk_switch_w32(gsw, MAC_MCR_FIXED_LINK, MTK_MAC_P2_MCR); - mt7530_mdio_w32(gsw, MT7530_PMCR_P(6), PMCR_FIXED_LINK); - } else { - /* GE1, Force 1000M/FD, FC ON, MAX_RX_LENGTH 1536 */ - mtk_switch_w32(gsw, MAC_MCR_FIXED_LINK_FC, MTK_MAC_P1_MCR); - mt7530_mdio_w32(gsw, MT7530_PMCR_P(6), PMCR_FIXED_LINK_FC); - } - - /* GE2, Link down */ - mtk_switch_w32(gsw, MAC_MCR_FORCE_MODE, MTK_MAC_P2_MCR); - - /* Enable Port 6, P5 as GMAC5, P5 disable */ - val = mt7530_mdio_r32(gsw, MT7530_MHWTRAP); - /* Enable Port 6 */ - val &= ~MHWTRAP_P6_DIS; - /* Disable Port 5 */ - val |= MHWTRAP_P5_DIS; - /* manual override of HW-Trap */ - val |= MHWTRAP_MANUAL; - mt7530_mdio_w32(gsw, MT7530_MHWTRAP, val); - - val = rt_sysc_r32(SYSC_REG_CFG); - val = (val >> MT7621_XTAL_SHIFT) & MT7621_XTAL_MASK; - if (val < MT7621_XTAL_25 && val >= MT7621_XTAL_40) { - /* 40Mhz */ - - /* disable MT7530 core clock */ - _mt7620_mii_write(gsw, 0, 13, 0x1f); - _mt7620_mii_write(gsw, 0, 14, 0x410); - _mt7620_mii_write(gsw, 0, 13, 0x401f); - _mt7620_mii_write(gsw, 0, 14, 0x0); - - /* disable MT7530 PLL */ - _mt7620_mii_write(gsw, 0, 13, 0x1f); - _mt7620_mii_write(gsw, 0, 14, 0x40d); - _mt7620_mii_write(gsw, 0, 13, 0x401f); - _mt7620_mii_write(gsw, 0, 14, 0x2020); - - /* for MT7530 core clock = 500Mhz */ - _mt7620_mii_write(gsw, 0, 13, 0x1f); - _mt7620_mii_write(gsw, 0, 14, 0x40e); - _mt7620_mii_write(gsw, 0, 13, 0x401f); - _mt7620_mii_write(gsw, 0, 14, 0x119); - - /* enable MT7530 PLL */ - _mt7620_mii_write(gsw, 0, 13, 0x1f); - _mt7620_mii_write(gsw, 0, 14, 0x40d); - _mt7620_mii_write(gsw, 0, 13, 0x401f); - _mt7620_mii_write(gsw, 0, 14, 0x2820); - - usleep_range(20, 40); - - /* enable MT7530 core clock */ - _mt7620_mii_write(gsw, 0, 13, 0x1f); - _mt7620_mii_write(gsw, 0, 14, 0x410); - _mt7620_mii_write(gsw, 0, 13, 0x401f); - } - - /* RGMII */ - _mt7620_mii_write(gsw, 0, 14, 0x1); - - /* set MT7530 central align */ - mt7530_mdio_m32(gsw, BIT(0), P6ECR_INTF_MODE_RGMII, MT7530_P6ECR); - mt7530_mdio_m32(gsw, TRGMII_TXCTRL_TXC_INV, 0, - MT7530_TRGMII_TXCTRL); - mt7530_mdio_w32(gsw, MT7530_TRGMII_TCK_CTRL, 0x855); - - /* delay setting for 10/1000M */ - mt7530_mdio_w32(gsw, MT7530_P5RGMIIRXCR, - P5RGMIIRXCR_C_ALIGN | P5RGMIIRXCR_DELAY_2); - mt7530_mdio_w32(gsw, MT7530_P5RGMIITXCR, 0x14); - - /* lower Tx Driving*/ - mt7530_mdio_w32(gsw, MT7530_TRGMII_TD0_ODT, 0x44); - mt7530_mdio_w32(gsw, MT7530_TRGMII_TD1_ODT, 0x44); - mt7530_mdio_w32(gsw, MT7530_TRGMII_TD2_ODT, 0x44); - mt7530_mdio_w32(gsw, MT7530_TRGMII_TD3_ODT, 0x44); - mt7530_mdio_w32(gsw, MT7530_TRGMII_TD4_ODT, 0x44); - mt7530_mdio_w32(gsw, MT7530_TRGMII_TD5_ODT, 0x44); - - /* turn on all PHYs */ - for (i = 0; i <= 4; i++) { - val = _mt7620_mii_read(gsw, i, 0); - val &= ~BIT(11); - _mt7620_mii_write(gsw, i, 0, val); - } - -#define MT7530_NUM_PORTS 8 -#define REG_ESW_PORT_PCR(x) (0x2004 | ((x) << 8)) -#define REG_ESW_PORT_PVC(x) (0x2010 | ((x) << 8)) -#define REG_ESW_PORT_PPBV1(x) (0x2014 | ((x) << 8)) -#define MT7530_CPU_PORT 6 - - /* This is copied from mt7530_apply_config in libreCMC driver */ - { - int i; - - for (i = 0; i < MT7530_NUM_PORTS; i++) - mt7530_mdio_w32(gsw, REG_ESW_PORT_PCR(i), 0x00400000); - - mt7530_mdio_w32(gsw, REG_ESW_PORT_PCR(MT7530_CPU_PORT), - 0x00ff0000); - - for (i = 0; i < MT7530_NUM_PORTS; i++) - mt7530_mdio_w32(gsw, REG_ESW_PORT_PVC(i), 0x810000c0); - } - - /* enable irq */ - mt7530_mdio_m32(gsw, 0, 3 << 16, MT7530_TOP_SIG_CTRL); - mt7530_mdio_w32(gsw, MT7530_SYS_INT_EN, 0x1f); -} - -static const struct of_device_id mediatek_gsw_match[] = { - { .compatible = "mediatek,mt7621-gsw" }, - {}, -}; -MODULE_DEVICE_TABLE(of, mediatek_gsw_match); - -int mtk_gsw_init(struct mtk_eth *eth) -{ - struct device_node *np = eth->switch_np; - struct platform_device *pdev = of_find_device_by_node(np); - struct mt7620_gsw *gsw; - - if (!pdev) - return -ENODEV; - - if (!of_device_is_compatible(np, mediatek_gsw_match->compatible)) - return -EINVAL; - - gsw = platform_get_drvdata(pdev); - eth->sw_priv = gsw; - - if (!gsw->irq) - return -EINVAL; - - request_irq(gsw->irq, gsw_interrupt_mt7621, 0, - "gsw", eth); - disable_irq(gsw->irq); - - mt7621_hw_init(eth, gsw, np); - - enable_irq(gsw->irq); - - return 0; -} -EXPORT_SYMBOL_GPL(mtk_gsw_init); - -static int mt7621_gsw_probe(struct platform_device *pdev) -{ - struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - struct mt7620_gsw *gsw; - - gsw = devm_kzalloc(&pdev->dev, sizeof(struct mt7620_gsw), GFP_KERNEL); - if (!gsw) - return -ENOMEM; - - gsw->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(gsw->base)) - return PTR_ERR(gsw->base); - - gsw->dev = &pdev->dev; - gsw->irq = irq_of_parse_and_map(pdev->dev.of_node, 0); - - platform_set_drvdata(pdev, gsw); - - return 0; -} - -static int mt7621_gsw_remove(struct platform_device *pdev) -{ - platform_set_drvdata(pdev, NULL); - - return 0; -} - -static struct platform_driver gsw_driver = { - .probe = mt7621_gsw_probe, - .remove = mt7621_gsw_remove, - .driver = { - .name = "mt7621-gsw", - .of_match_table = mediatek_gsw_match, - }, -}; - -module_platform_driver(gsw_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("John Crispin "); -MODULE_DESCRIPTION("GBit switch driver for Mediatek MT7621 SoC"); diff --git a/drivers/staging/mt7621-eth/mdio.c b/drivers/staging/mt7621-eth/mdio.c deleted file mode 100644 index 5fea6a447eed..000000000000 --- a/drivers/staging/mt7621-eth/mdio.c +++ /dev/null @@ -1,275 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#include -#include -#include -#include -#include - -#include "mtk_eth_soc.h" -#include "mdio.h" - -static int mtk_mdio_reset(struct mii_bus *bus) -{ - /* TODO */ - return 0; -} - -static void mtk_phy_link_adjust(struct net_device *dev) -{ - struct mtk_eth *eth = netdev_priv(dev); - unsigned long flags; - int i; - - spin_lock_irqsave(ð->phy->lock, flags); - for (i = 0; i < 8; i++) { - if (eth->phy->phy_node[i]) { - struct phy_device *phydev = eth->phy->phy[i]; - int status_change = 0; - - if (phydev->link) - if (eth->phy->duplex[i] != phydev->duplex || - eth->phy->speed[i] != phydev->speed) - status_change = 1; - - if (phydev->link != eth->link[i]) - status_change = 1; - - switch (phydev->speed) { - case SPEED_1000: - case SPEED_100: - case SPEED_10: - eth->link[i] = phydev->link; - eth->phy->duplex[i] = phydev->duplex; - eth->phy->speed[i] = phydev->speed; - - if (status_change && - eth->soc->mdio_adjust_link) - eth->soc->mdio_adjust_link(eth, i); - break; - } - } - } - spin_unlock_irqrestore(ð->phy->lock, flags); -} - -int mtk_connect_phy_node(struct mtk_eth *eth, struct mtk_mac *mac, - struct device_node *phy_node) -{ - const __be32 *_port = NULL; - struct phy_device *phydev; - int phy_mode, port; - - _port = of_get_property(phy_node, "reg", NULL); - - if (!_port || (be32_to_cpu(*_port) >= 0x20)) { - pr_err("%pOFn: invalid port id\n", phy_node); - return -EINVAL; - } - port = be32_to_cpu(*_port); - phy_mode = of_get_phy_mode(phy_node); - if (phy_mode < 0) { - dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode); - eth->phy->phy_node[port] = NULL; - return -EINVAL; - } - - phydev = of_phy_connect(eth->netdev[mac->id], phy_node, - mtk_phy_link_adjust, 0, phy_mode); - if (!phydev) { - dev_err(eth->dev, "could not connect to PHY\n"); - eth->phy->phy_node[port] = NULL; - return -ENODEV; - } - - phydev->supported &= PHY_1000BT_FEATURES; - phydev->advertising = phydev->supported; - - dev_info(eth->dev, - "connected port %d to PHY at %s [uid=%08x, driver=%s]\n", - port, phydev_name(phydev), phydev->phy_id, - phydev->drv->name); - - eth->phy->phy[port] = phydev; - eth->link[port] = 0; - - return 0; -} - -static void phy_init(struct mtk_eth *eth, struct mtk_mac *mac, - struct phy_device *phy) -{ - phy_attach(eth->netdev[mac->id], phydev_name(phy), - PHY_INTERFACE_MODE_MII); - - phy->autoneg = AUTONEG_ENABLE; - phy->speed = 0; - phy->duplex = 0; - phy_set_max_speed(phy, SPEED_100); - phy->advertising = phy->supported | ADVERTISED_Autoneg; - - phy_start_aneg(phy); -} - -static int mtk_phy_connect(struct mtk_mac *mac) -{ - struct mtk_eth *eth = mac->hw; - int i; - - for (i = 0; i < 8; i++) { - if (eth->phy->phy_node[i]) { - if (!mac->phy_dev) { - mac->phy_dev = eth->phy->phy[i]; - mac->phy_flags = MTK_PHY_FLAG_PORT; - } - } else if (eth->mii_bus) { - struct phy_device *phy; - - phy = mdiobus_get_phy(eth->mii_bus, i); - if (phy) { - phy_init(eth, mac, phy); - if (!mac->phy_dev) { - mac->phy_dev = phy; - mac->phy_flags = MTK_PHY_FLAG_ATTACH; - } - } - } - } - - return 0; -} - -static void mtk_phy_disconnect(struct mtk_mac *mac) -{ - struct mtk_eth *eth = mac->hw; - unsigned long flags; - int i; - - for (i = 0; i < 8; i++) - if (eth->phy->phy_fixed[i]) { - spin_lock_irqsave(ð->phy->lock, flags); - eth->link[i] = 0; - if (eth->soc->mdio_adjust_link) - eth->soc->mdio_adjust_link(eth, i); - spin_unlock_irqrestore(ð->phy->lock, flags); - } else if (eth->phy->phy[i]) { - phy_disconnect(eth->phy->phy[i]); - } else if (eth->mii_bus) { - struct phy_device *phy = - mdiobus_get_phy(eth->mii_bus, i); - - if (phy) - phy_detach(phy); - } -} - -static void mtk_phy_start(struct mtk_mac *mac) -{ - struct mtk_eth *eth = mac->hw; - unsigned long flags; - int i; - - for (i = 0; i < 8; i++) { - if (eth->phy->phy_fixed[i]) { - spin_lock_irqsave(ð->phy->lock, flags); - eth->link[i] = 1; - if (eth->soc->mdio_adjust_link) - eth->soc->mdio_adjust_link(eth, i); - spin_unlock_irqrestore(ð->phy->lock, flags); - } else if (eth->phy->phy[i]) { - phy_start(eth->phy->phy[i]); - } - } -} - -static void mtk_phy_stop(struct mtk_mac *mac) -{ - struct mtk_eth *eth = mac->hw; - unsigned long flags; - int i; - - for (i = 0; i < 8; i++) - if (eth->phy->phy_fixed[i]) { - spin_lock_irqsave(ð->phy->lock, flags); - eth->link[i] = 0; - if (eth->soc->mdio_adjust_link) - eth->soc->mdio_adjust_link(eth, i); - spin_unlock_irqrestore(ð->phy->lock, flags); - } else if (eth->phy->phy[i]) { - phy_stop(eth->phy->phy[i]); - } -} - -static struct mtk_phy phy_ralink = { - .connect = mtk_phy_connect, - .disconnect = mtk_phy_disconnect, - .start = mtk_phy_start, - .stop = mtk_phy_stop, -}; - -int mtk_mdio_init(struct mtk_eth *eth) -{ - struct device_node *mii_np; - int err; - - if (!eth->soc->mdio_read || !eth->soc->mdio_write) - return 0; - - spin_lock_init(&phy_ralink.lock); - eth->phy = &phy_ralink; - - mii_np = of_get_child_by_name(eth->dev->of_node, "mdio-bus"); - if (!mii_np) { - dev_err(eth->dev, "no %s child node found", "mdio-bus"); - return -ENODEV; - } - - if (!of_device_is_available(mii_np)) { - err = 0; - goto err_put_node; - } - - eth->mii_bus = mdiobus_alloc(); - if (!eth->mii_bus) { - err = -ENOMEM; - goto err_put_node; - } - - eth->mii_bus->name = "mdio"; - eth->mii_bus->read = eth->soc->mdio_read; - eth->mii_bus->write = eth->soc->mdio_write; - eth->mii_bus->reset = mtk_mdio_reset; - eth->mii_bus->priv = eth; - eth->mii_bus->parent = eth->dev; - - snprintf(eth->mii_bus->id, MII_BUS_ID_SIZE, "%pOFn", mii_np); - err = of_mdiobus_register(eth->mii_bus, mii_np); - if (err) - goto err_free_bus; - - return 0; - -err_free_bus: - kfree(eth->mii_bus); -err_put_node: - of_node_put(mii_np); - eth->mii_bus = NULL; - return err; -} - -void mtk_mdio_cleanup(struct mtk_eth *eth) -{ - if (!eth->mii_bus) - return; - - mdiobus_unregister(eth->mii_bus); - of_node_put(eth->mii_bus->dev.of_node); - kfree(eth->mii_bus); -} diff --git a/drivers/staging/mt7621-eth/mdio.h b/drivers/staging/mt7621-eth/mdio.h deleted file mode 100644 index b14e23842a01..000000000000 --- a/drivers/staging/mt7621-eth/mdio.h +++ /dev/null @@ -1,27 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#ifndef _RALINK_MDIO_H__ -#define _RALINK_MDIO_H__ - -#ifdef CONFIG_NET_MEDIATEK_MDIO -int mtk_mdio_init(struct mtk_eth *eth); -void mtk_mdio_cleanup(struct mtk_eth *eth); -int mtk_connect_phy_node(struct mtk_eth *eth, struct mtk_mac *mac, - struct device_node *phy_node); -#else -static inline int mtk_mdio_init(struct mtk_eth *eth) { return 0; } -static inline void mtk_mdio_cleanup(struct mtk_eth *eth) {} -#endif -#endif diff --git a/drivers/staging/mt7621-eth/mdio_mt7620.c b/drivers/staging/mt7621-eth/mdio_mt7620.c deleted file mode 100644 index ced605c2914e..000000000000 --- a/drivers/staging/mt7621-eth/mdio_mt7620.c +++ /dev/null @@ -1,173 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#include -#include -#include - -#include "mtk_eth_soc.h" -#include "gsw_mt7620.h" -#include "mdio.h" - -static int mt7620_mii_busy_wait(struct mt7620_gsw *gsw) -{ - unsigned long t_start = jiffies; - - while (1) { - if (!(mtk_switch_r32(gsw, - gsw->piac_offset + MT7620_GSW_REG_PIAC) & - GSW_MDIO_ACCESS)) - return 0; - if (time_after(jiffies, t_start + GSW_REG_PHY_TIMEOUT)) - break; - } - - dev_err(gsw->dev, "mdio: MDIO timeout\n"); - return -1; -} - -u32 _mt7620_mii_write(struct mt7620_gsw *gsw, u32 phy_addr, - u32 phy_register, u32 write_data) -{ - if (mt7620_mii_busy_wait(gsw)) - return -1; - - write_data &= 0xffff; - - mtk_switch_w32(gsw, GSW_MDIO_ACCESS | GSW_MDIO_START | GSW_MDIO_WRITE | - (phy_register << GSW_MDIO_REG_SHIFT) | - (phy_addr << GSW_MDIO_ADDR_SHIFT) | write_data, - MT7620_GSW_REG_PIAC); - - if (mt7620_mii_busy_wait(gsw)) - return -1; - - return 0; -} -EXPORT_SYMBOL_GPL(_mt7620_mii_write); - -u32 _mt7620_mii_read(struct mt7620_gsw *gsw, int phy_addr, int phy_reg) -{ - u32 d; - - if (mt7620_mii_busy_wait(gsw)) - return 0xffff; - - mtk_switch_w32(gsw, GSW_MDIO_ACCESS | GSW_MDIO_START | GSW_MDIO_READ | - (phy_reg << GSW_MDIO_REG_SHIFT) | - (phy_addr << GSW_MDIO_ADDR_SHIFT), - MT7620_GSW_REG_PIAC); - - if (mt7620_mii_busy_wait(gsw)) - return 0xffff; - - d = mtk_switch_r32(gsw, MT7620_GSW_REG_PIAC) & 0xffff; - - return d; -} -EXPORT_SYMBOL_GPL(_mt7620_mii_read); - -int mt7620_mdio_write(struct mii_bus *bus, int phy_addr, int phy_reg, u16 val) -{ - struct mtk_eth *eth = bus->priv; - struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv; - - return _mt7620_mii_write(gsw, phy_addr, phy_reg, val); -} - -int mt7620_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) -{ - struct mtk_eth *eth = bus->priv; - struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv; - - return _mt7620_mii_read(gsw, phy_addr, phy_reg); -} - -void mt7530_mdio_w32(struct mt7620_gsw *gsw, u32 reg, u32 val) -{ - _mt7620_mii_write(gsw, 0x1f, 0x1f, (reg >> 6) & 0x3ff); - _mt7620_mii_write(gsw, 0x1f, (reg >> 2) & 0xf, val & 0xffff); - _mt7620_mii_write(gsw, 0x1f, 0x10, val >> 16); -} -EXPORT_SYMBOL_GPL(mt7530_mdio_w32); - -u32 mt7530_mdio_r32(struct mt7620_gsw *gsw, u32 reg) -{ - u16 high, low; - - _mt7620_mii_write(gsw, 0x1f, 0x1f, (reg >> 6) & 0x3ff); - low = _mt7620_mii_read(gsw, 0x1f, (reg >> 2) & 0xf); - high = _mt7620_mii_read(gsw, 0x1f, 0x10); - - return (high << 16) | (low & 0xffff); -} -EXPORT_SYMBOL_GPL(mt7530_mdio_r32); - -void mt7530_mdio_m32(struct mt7620_gsw *gsw, u32 mask, u32 set, u32 reg) -{ - u32 val = mt7530_mdio_r32(gsw, reg); - - val &= ~mask; - val |= set; - mt7530_mdio_w32(gsw, reg, val); -} -EXPORT_SYMBOL_GPL(mt7530_mdio_m32); - -static unsigned char *mtk_speed_str(int speed) -{ - switch (speed) { - case 2: - case SPEED_1000: - return "1000"; - case 1: - case SPEED_100: - return "100"; - case 0: - case SPEED_10: - return "10"; - } - - return "? "; -} - -int mt7620_has_carrier(struct mtk_eth *eth) -{ - struct mt7620_gsw *gsw = (struct mt7620_gsw *)eth->sw_priv; - int i; - - for (i = 0; i < GSW_PORT6; i++) - if (mt7530_mdio_r32(gsw, GSW_REG_PORT_STATUS(i)) & 0x1) - return 1; - return 0; -} - -void mt7620_print_link_state(struct mtk_eth *eth, int port, int link, - int speed, int duplex) -{ - struct mt7620_gsw *gsw = eth->sw_priv; - - if (link) - dev_info(gsw->dev, "port %d link up (%sMbps/%s duplex)\n", - port, mtk_speed_str(speed), - (duplex) ? "Full" : "Half"); - else - dev_info(gsw->dev, "port %d link down\n", port); -} - -void mt7620_mdio_link_adjust(struct mtk_eth *eth, int port) -{ - mt7620_print_link_state(eth, port, eth->link[port], - eth->phy->speed[port], - (eth->phy->duplex[port] == DUPLEX_FULL)); -} diff --git a/drivers/staging/mt7621-eth/mtk_eth_soc.c b/drivers/staging/mt7621-eth/mtk_eth_soc.c deleted file mode 100644 index 6027b19f7bc2..000000000000 --- a/drivers/staging/mt7621-eth/mtk_eth_soc.c +++ /dev/null @@ -1,2176 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "mtk_eth_soc.h" -#include "mdio.h" -#include "ethtool.h" - -#define MAX_RX_LENGTH 1536 -#define MTK_RX_ETH_HLEN (VLAN_ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) -#define MTK_RX_HLEN (NET_SKB_PAD + MTK_RX_ETH_HLEN + NET_IP_ALIGN) -#define DMA_DUMMY_DESC 0xffffffff -#define MTK_DEFAULT_MSG_ENABLE \ - (NETIF_MSG_DRV | \ - NETIF_MSG_PROBE | \ - NETIF_MSG_LINK | \ - NETIF_MSG_TIMER | \ - NETIF_MSG_IFDOWN | \ - NETIF_MSG_IFUP | \ - NETIF_MSG_RX_ERR | \ - NETIF_MSG_TX_ERR) - -#define TX_DMA_DESP2_DEF (TX_DMA_LS0 | TX_DMA_DONE) -#define NEXT_TX_DESP_IDX(X) (((X) + 1) & (ring->tx_ring_size - 1)) -#define NEXT_RX_DESP_IDX(X) (((X) + 1) & (ring->rx_ring_size - 1)) - -#define SYSC_REG_RSTCTRL 0x34 - -static int mtk_msg_level = -1; -module_param_named(msg_level, mtk_msg_level, int, 0); -MODULE_PARM_DESC(msg_level, "Message level (-1=defaults,0=none,...,16=all)"); - -static const u16 mtk_reg_table_default[MTK_REG_COUNT] = { - [MTK_REG_PDMA_GLO_CFG] = MTK_PDMA_GLO_CFG, - [MTK_REG_PDMA_RST_CFG] = MTK_PDMA_RST_CFG, - [MTK_REG_DLY_INT_CFG] = MTK_DLY_INT_CFG, - [MTK_REG_TX_BASE_PTR0] = MTK_TX_BASE_PTR0, - [MTK_REG_TX_MAX_CNT0] = MTK_TX_MAX_CNT0, - [MTK_REG_TX_CTX_IDX0] = MTK_TX_CTX_IDX0, - [MTK_REG_TX_DTX_IDX0] = MTK_TX_DTX_IDX0, - [MTK_REG_RX_BASE_PTR0] = MTK_RX_BASE_PTR0, - [MTK_REG_RX_MAX_CNT0] = MTK_RX_MAX_CNT0, - [MTK_REG_RX_CALC_IDX0] = MTK_RX_CALC_IDX0, - [MTK_REG_RX_DRX_IDX0] = MTK_RX_DRX_IDX0, - [MTK_REG_MTK_INT_ENABLE] = MTK_INT_ENABLE, - [MTK_REG_MTK_INT_STATUS] = MTK_INT_STATUS, - [MTK_REG_MTK_DMA_VID_BASE] = MTK_DMA_VID0, - [MTK_REG_MTK_COUNTER_BASE] = MTK_GDMA1_TX_GBCNT, - [MTK_REG_MTK_RST_GL] = MTK_RST_GL, -}; - -static const u16 *mtk_reg_table = mtk_reg_table_default; - -void mtk_w32(struct mtk_eth *eth, u32 val, unsigned int reg) -{ - __raw_writel(val, eth->base + reg); -} - -u32 mtk_r32(struct mtk_eth *eth, unsigned int reg) -{ - return __raw_readl(eth->base + reg); -} - -static void mtk_reg_w32(struct mtk_eth *eth, u32 val, enum mtk_reg reg) -{ - mtk_w32(eth, val, mtk_reg_table[reg]); -} - -static u32 mtk_reg_r32(struct mtk_eth *eth, enum mtk_reg reg) -{ - return mtk_r32(eth, mtk_reg_table[reg]); -} - -/* these bits are also exposed via the reset-controller API. however the switch - * and FE need to be brought out of reset in the exakt same moemtn and the - * reset-controller api does not provide this feature yet. Do the reset manually - * until we fixed the reset-controller api to be able to do this - */ -void mtk_reset(struct mtk_eth *eth, u32 reset_bits) -{ - u32 val; - - regmap_read(eth->ethsys, SYSC_REG_RSTCTRL, &val); - val |= reset_bits; - regmap_write(eth->ethsys, SYSC_REG_RSTCTRL, val); - usleep_range(10, 20); - val &= ~reset_bits; - regmap_write(eth->ethsys, SYSC_REG_RSTCTRL, val); - usleep_range(10, 20); -} -EXPORT_SYMBOL(mtk_reset); - -static inline void mtk_irq_ack(struct mtk_eth *eth, u32 mask) -{ - if (eth->soc->dma_type & MTK_PDMA) - mtk_reg_w32(eth, mask, MTK_REG_MTK_INT_STATUS); - if (eth->soc->dma_type & MTK_QDMA) - mtk_w32(eth, mask, MTK_QMTK_INT_STATUS); -} - -static inline u32 mtk_irq_pending(struct mtk_eth *eth) -{ - u32 status = 0; - - if (eth->soc->dma_type & MTK_PDMA) - status |= mtk_reg_r32(eth, MTK_REG_MTK_INT_STATUS); - if (eth->soc->dma_type & MTK_QDMA) - status |= mtk_r32(eth, MTK_QMTK_INT_STATUS); - - return status; -} - -static void mtk_irq_ack_status(struct mtk_eth *eth, u32 mask) -{ - u32 status_reg = MTK_REG_MTK_INT_STATUS; - - if (mtk_reg_table[MTK_REG_MTK_INT_STATUS2]) - status_reg = MTK_REG_MTK_INT_STATUS2; - - mtk_reg_w32(eth, mask, status_reg); -} - -static u32 mtk_irq_pending_status(struct mtk_eth *eth) -{ - u32 status_reg = MTK_REG_MTK_INT_STATUS; - - if (mtk_reg_table[MTK_REG_MTK_INT_STATUS2]) - status_reg = MTK_REG_MTK_INT_STATUS2; - - return mtk_reg_r32(eth, status_reg); -} - -static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) -{ - u32 val; - - if (eth->soc->dma_type & MTK_PDMA) { - val = mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE); - mtk_reg_w32(eth, val & ~mask, MTK_REG_MTK_INT_ENABLE); - /* flush write */ - mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE); - } - if (eth->soc->dma_type & MTK_QDMA) { - val = mtk_r32(eth, MTK_QMTK_INT_ENABLE); - mtk_w32(eth, val & ~mask, MTK_QMTK_INT_ENABLE); - /* flush write */ - mtk_r32(eth, MTK_QMTK_INT_ENABLE); - } -} - -static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask) -{ - u32 val; - - if (eth->soc->dma_type & MTK_PDMA) { - val = mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE); - mtk_reg_w32(eth, val | mask, MTK_REG_MTK_INT_ENABLE); - /* flush write */ - mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE); - } - if (eth->soc->dma_type & MTK_QDMA) { - val = mtk_r32(eth, MTK_QMTK_INT_ENABLE); - mtk_w32(eth, val | mask, MTK_QMTK_INT_ENABLE); - /* flush write */ - mtk_r32(eth, MTK_QMTK_INT_ENABLE); - } -} - -static inline u32 mtk_irq_enabled(struct mtk_eth *eth) -{ - u32 enabled = 0; - - if (eth->soc->dma_type & MTK_PDMA) - enabled |= mtk_reg_r32(eth, MTK_REG_MTK_INT_ENABLE); - if (eth->soc->dma_type & MTK_QDMA) - enabled |= mtk_r32(eth, MTK_QMTK_INT_ENABLE); - - return enabled; -} - -static inline void mtk_hw_set_macaddr(struct mtk_mac *mac, - unsigned char *macaddr) -{ - unsigned long flags; - - spin_lock_irqsave(&mac->hw->page_lock, flags); - mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1], MTK_GDMA1_MAC_ADRH); - mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) | - (macaddr[4] << 8) | macaddr[5], - MTK_GDMA1_MAC_ADRL); - spin_unlock_irqrestore(&mac->hw->page_lock, flags); -} - -static int mtk_set_mac_address(struct net_device *dev, void *p) -{ - int ret = eth_mac_addr(dev, p); - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - - if (ret) - return ret; - - if (eth->soc->set_mac) - eth->soc->set_mac(mac, dev->dev_addr); - else - mtk_hw_set_macaddr(mac, p); - - return 0; -} - -static inline int mtk_max_frag_size(int mtu) -{ - /* make sure buf_size will be at least MAX_RX_LENGTH */ - if (mtu + MTK_RX_ETH_HLEN < MAX_RX_LENGTH) - mtu = MAX_RX_LENGTH - MTK_RX_ETH_HLEN; - - return SKB_DATA_ALIGN(MTK_RX_HLEN + mtu) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); -} - -static inline int mtk_max_buf_size(int frag_size) -{ - int buf_size = frag_size - NET_SKB_PAD - NET_IP_ALIGN - - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - - WARN_ON(buf_size < MAX_RX_LENGTH); - - return buf_size; -} - -static inline void mtk_get_rxd(struct mtk_rx_dma *rxd, - struct mtk_rx_dma *dma_rxd) -{ - rxd->rxd1 = READ_ONCE(dma_rxd->rxd1); - rxd->rxd2 = READ_ONCE(dma_rxd->rxd2); - rxd->rxd3 = READ_ONCE(dma_rxd->rxd3); - rxd->rxd4 = READ_ONCE(dma_rxd->rxd4); -} - -static inline void mtk_set_txd_pdma(struct mtk_tx_dma *txd, - struct mtk_tx_dma *dma_txd) -{ - WRITE_ONCE(dma_txd->txd1, txd->txd1); - WRITE_ONCE(dma_txd->txd3, txd->txd3); - WRITE_ONCE(dma_txd->txd4, txd->txd4); - /* clean dma done flag last */ - WRITE_ONCE(dma_txd->txd2, txd->txd2); -} - -static void mtk_clean_rx(struct mtk_eth *eth, struct mtk_rx_ring *ring) -{ - int i; - - if (ring->rx_data && ring->rx_dma) { - for (i = 0; i < ring->rx_ring_size; i++) { - if (!ring->rx_data[i]) - continue; - if (!ring->rx_dma[i].rxd1) - continue; - dma_unmap_single(eth->dev, - ring->rx_dma[i].rxd1, - ring->rx_buf_size, - DMA_FROM_DEVICE); - skb_free_frag(ring->rx_data[i]); - } - kfree(ring->rx_data); - ring->rx_data = NULL; - } - - if (ring->rx_dma) { - dma_free_coherent(eth->dev, - ring->rx_ring_size * sizeof(*ring->rx_dma), - ring->rx_dma, - ring->rx_phys); - ring->rx_dma = NULL; - } -} - -static int mtk_dma_rx_alloc(struct mtk_eth *eth, struct mtk_rx_ring *ring) -{ - int i, pad = 0; - - ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN); - ring->rx_buf_size = mtk_max_buf_size(ring->frag_size); - ring->rx_ring_size = eth->soc->dma_ring_size; - ring->rx_data = kcalloc(ring->rx_ring_size, sizeof(*ring->rx_data), - GFP_KERNEL); - if (!ring->rx_data) - goto no_rx_mem; - - for (i = 0; i < ring->rx_ring_size; i++) { - ring->rx_data[i] = netdev_alloc_frag(ring->frag_size); - if (!ring->rx_data[i]) - goto no_rx_mem; - } - - ring->rx_dma = - dma_alloc_coherent(eth->dev, - ring->rx_ring_size * sizeof(*ring->rx_dma), - &ring->rx_phys, GFP_ATOMIC | __GFP_ZERO); - if (!ring->rx_dma) - goto no_rx_mem; - - if (!eth->soc->rx_2b_offset) - pad = NET_IP_ALIGN; - - for (i = 0; i < ring->rx_ring_size; i++) { - dma_addr_t dma_addr = dma_map_single(eth->dev, - ring->rx_data[i] + NET_SKB_PAD + pad, - ring->rx_buf_size, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(eth->dev, dma_addr))) - goto no_rx_mem; - ring->rx_dma[i].rxd1 = (unsigned int)dma_addr; - - if (eth->soc->rx_sg_dma) - ring->rx_dma[i].rxd2 = RX_DMA_PLEN0(ring->rx_buf_size); - else - ring->rx_dma[i].rxd2 = RX_DMA_LSO; - } - ring->rx_calc_idx = ring->rx_ring_size - 1; - /* make sure that all changes to the dma ring are flushed before we - * continue - */ - wmb(); - - return 0; - -no_rx_mem: - return -ENOMEM; -} - -static void mtk_txd_unmap(struct device *dev, struct mtk_tx_buf *tx_buf) -{ - if (tx_buf->flags & MTK_TX_FLAGS_SINGLE0) { - dma_unmap_single(dev, - dma_unmap_addr(tx_buf, dma_addr0), - dma_unmap_len(tx_buf, dma_len0), - DMA_TO_DEVICE); - } else if (tx_buf->flags & MTK_TX_FLAGS_PAGE0) { - dma_unmap_page(dev, - dma_unmap_addr(tx_buf, dma_addr0), - dma_unmap_len(tx_buf, dma_len0), - DMA_TO_DEVICE); - } - if (tx_buf->flags & MTK_TX_FLAGS_PAGE1) - dma_unmap_page(dev, - dma_unmap_addr(tx_buf, dma_addr1), - dma_unmap_len(tx_buf, dma_len1), - DMA_TO_DEVICE); - - tx_buf->flags = 0; - if (tx_buf->skb && (tx_buf->skb != (struct sk_buff *)DMA_DUMMY_DESC)) - dev_kfree_skb_any(tx_buf->skb); - tx_buf->skb = NULL; -} - -static void mtk_pdma_tx_clean(struct mtk_eth *eth) -{ - struct mtk_tx_ring *ring = ð->tx_ring; - int i; - - if (ring->tx_buf) { - for (i = 0; i < ring->tx_ring_size; i++) - mtk_txd_unmap(eth->dev, &ring->tx_buf[i]); - kfree(ring->tx_buf); - ring->tx_buf = NULL; - } - - if (ring->tx_dma) { - dma_free_coherent(eth->dev, - ring->tx_ring_size * sizeof(*ring->tx_dma), - ring->tx_dma, - ring->tx_phys); - ring->tx_dma = NULL; - } -} - -static void mtk_qdma_tx_clean(struct mtk_eth *eth) -{ - struct mtk_tx_ring *ring = ð->tx_ring; - int i; - - if (ring->tx_buf) { - for (i = 0; i < ring->tx_ring_size; i++) - mtk_txd_unmap(eth->dev, &ring->tx_buf[i]); - kfree(ring->tx_buf); - ring->tx_buf = NULL; - } - - if (ring->tx_dma) { - dma_free_coherent(eth->dev, - ring->tx_ring_size * sizeof(*ring->tx_dma), - ring->tx_dma, - ring->tx_phys); - ring->tx_dma = NULL; - } -} - -void mtk_stats_update_mac(struct mtk_mac *mac) -{ - struct mtk_hw_stats *hw_stats = mac->hw_stats; - unsigned int base = mtk_reg_table[MTK_REG_MTK_COUNTER_BASE]; - u64 stats; - - base += hw_stats->reg_offset; - - u64_stats_update_begin(&hw_stats->syncp); - - if (mac->hw->soc->new_stats) { - hw_stats->rx_bytes += mtk_r32(mac->hw, base); - stats = mtk_r32(mac->hw, base + 0x04); - if (stats) - hw_stats->rx_bytes += (stats << 32); - hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x08); - hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x10); - hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x14); - hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x18); - hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x1c); - hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x20); - hw_stats->rx_flow_control_packets += - mtk_r32(mac->hw, base + 0x24); - hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x28); - hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x2c); - hw_stats->tx_bytes += mtk_r32(mac->hw, base + 0x30); - stats = mtk_r32(mac->hw, base + 0x34); - if (stats) - hw_stats->tx_bytes += (stats << 32); - hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x38); - } else { - hw_stats->tx_bytes += mtk_r32(mac->hw, base); - hw_stats->tx_packets += mtk_r32(mac->hw, base + 0x04); - hw_stats->tx_skip += mtk_r32(mac->hw, base + 0x08); - hw_stats->tx_collisions += mtk_r32(mac->hw, base + 0x0c); - hw_stats->rx_bytes += mtk_r32(mac->hw, base + 0x20); - hw_stats->rx_packets += mtk_r32(mac->hw, base + 0x24); - hw_stats->rx_overflow += mtk_r32(mac->hw, base + 0x28); - hw_stats->rx_fcs_errors += mtk_r32(mac->hw, base + 0x2c); - hw_stats->rx_short_errors += mtk_r32(mac->hw, base + 0x30); - hw_stats->rx_long_errors += mtk_r32(mac->hw, base + 0x34); - hw_stats->rx_checksum_errors += mtk_r32(mac->hw, base + 0x38); - hw_stats->rx_flow_control_packets += - mtk_r32(mac->hw, base + 0x3c); - } - - u64_stats_update_end(&hw_stats->syncp); -} - -static void mtk_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *storage) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_hw_stats *hw_stats = mac->hw_stats; - unsigned int base = mtk_reg_table[MTK_REG_MTK_COUNTER_BASE]; - unsigned int start; - - if (!base) { - netdev_stats_to_stats64(storage, &dev->stats); - return; - } - - if (netif_running(dev) && netif_device_present(dev)) { - if (spin_trylock(&hw_stats->stats_lock)) { - mtk_stats_update_mac(mac); - spin_unlock(&hw_stats->stats_lock); - } - } - - do { - start = u64_stats_fetch_begin_irq(&hw_stats->syncp); - storage->rx_packets = hw_stats->rx_packets; - storage->tx_packets = hw_stats->tx_packets; - storage->rx_bytes = hw_stats->rx_bytes; - storage->tx_bytes = hw_stats->tx_bytes; - storage->collisions = hw_stats->tx_collisions; - storage->rx_length_errors = hw_stats->rx_short_errors + - hw_stats->rx_long_errors; - storage->rx_over_errors = hw_stats->rx_overflow; - storage->rx_crc_errors = hw_stats->rx_fcs_errors; - storage->rx_errors = hw_stats->rx_checksum_errors; - storage->tx_aborted_errors = hw_stats->tx_skip; - } while (u64_stats_fetch_retry_irq(&hw_stats->syncp, start)); - - storage->tx_errors = dev->stats.tx_errors; - storage->rx_dropped = dev->stats.rx_dropped; - storage->tx_dropped = dev->stats.tx_dropped; -} - -static int mtk_vlan_rx_add_vid(struct net_device *dev, - __be16 proto, u16 vid) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - u32 idx = (vid & 0xf); - u32 vlan_cfg; - - if (!((mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE]) && - (dev->features & NETIF_F_HW_VLAN_CTAG_TX))) - return 0; - - if (test_bit(idx, ð->vlan_map)) { - netdev_warn(dev, "disable tx vlan offload\n"); - dev->wanted_features &= ~NETIF_F_HW_VLAN_CTAG_TX; - netdev_update_features(dev); - } else { - vlan_cfg = mtk_r32(eth, - mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE] + - ((idx >> 1) << 2)); - if (idx & 0x1) { - vlan_cfg &= 0xffff; - vlan_cfg |= (vid << 16); - } else { - vlan_cfg &= 0xffff0000; - vlan_cfg |= vid; - } - mtk_w32(eth, - vlan_cfg, mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE] + - ((idx >> 1) << 2)); - set_bit(idx, ð->vlan_map); - } - - return 0; -} - -static int mtk_vlan_rx_kill_vid(struct net_device *dev, - __be16 proto, u16 vid) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - u32 idx = (vid & 0xf); - - if (!((mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE]) && - (dev->features & NETIF_F_HW_VLAN_CTAG_TX))) - return 0; - - clear_bit(idx, ð->vlan_map); - - return 0; -} - -static inline u32 mtk_pdma_empty_txd(struct mtk_tx_ring *ring) -{ - barrier(); - return (u32)(ring->tx_ring_size - - ((ring->tx_next_idx - ring->tx_free_idx) & - (ring->tx_ring_size - 1))); -} - -static int mtk_skb_padto(struct sk_buff *skb, struct mtk_eth *eth) -{ - unsigned int len; - int ret; - - if (unlikely(skb->len >= VLAN_ETH_ZLEN)) - return 0; - - if (eth->soc->padding_64b && !eth->soc->padding_bug) - return 0; - - if (skb_vlan_tag_present(skb)) - len = ETH_ZLEN; - else if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) - len = VLAN_ETH_ZLEN; - else if (!eth->soc->padding_64b) - len = ETH_ZLEN; - else - return 0; - - if (skb->len >= len) - return 0; - - ret = skb_pad(skb, len - skb->len); - if (ret < 0) - return ret; - skb->len = len; - skb_set_tail_pointer(skb, len); - - return ret; -} - -static int mtk_pdma_tx_map(struct sk_buff *skb, struct net_device *dev, - int tx_num, struct mtk_tx_ring *ring, bool gso) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - struct skb_frag_struct *frag; - struct mtk_tx_dma txd, *ptxd; - struct mtk_tx_buf *tx_buf; - int i, j, k, frag_size, frag_map_size, offset; - dma_addr_t mapped_addr; - unsigned int nr_frags; - u32 def_txd4; - - if (mtk_skb_padto(skb, eth)) { - netif_warn(eth, tx_err, dev, "tx padding failed!\n"); - return -1; - } - - tx_buf = &ring->tx_buf[ring->tx_next_idx]; - memset(tx_buf, 0, sizeof(*tx_buf)); - memset(&txd, 0, sizeof(txd)); - nr_frags = skb_shinfo(skb)->nr_frags; - - /* init tx descriptor */ - def_txd4 = eth->soc->txd4; - txd.txd4 = def_txd4; - - if (eth->soc->mac_count > 1) - txd.txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT; - - if (gso) - txd.txd4 |= TX_DMA_TSO; - - /* TX Checksum offload */ - if (skb->ip_summed == CHECKSUM_PARTIAL) - txd.txd4 |= TX_DMA_CHKSUM; - - /* VLAN header offload */ - if (skb_vlan_tag_present(skb)) { - u16 tag = skb_vlan_tag_get(skb); - - txd.txd4 |= TX_DMA_INS_VLAN | - ((tag >> VLAN_PRIO_SHIFT) << 4) | - (tag & 0xF); - } - - mapped_addr = dma_map_single(&dev->dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) - return -1; - - txd.txd1 = mapped_addr; - txd.txd2 = TX_DMA_PLEN0(skb_headlen(skb)); - - tx_buf->flags |= MTK_TX_FLAGS_SINGLE0; - dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb)); - - /* TX SG offload */ - j = ring->tx_next_idx; - k = 0; - for (i = 0; i < nr_frags; i++) { - offset = 0; - frag = &skb_shinfo(skb)->frags[i]; - frag_size = skb_frag_size(frag); - - while (frag_size > 0) { - frag_map_size = min(frag_size, TX_DMA_BUF_LEN); - mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset, - frag_map_size, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) - goto err_dma; - - if (k & 0x1) { - j = NEXT_TX_DESP_IDX(j); - txd.txd1 = mapped_addr; - txd.txd2 = TX_DMA_PLEN0(frag_map_size); - txd.txd4 = def_txd4; - - tx_buf = &ring->tx_buf[j]; - memset(tx_buf, 0, sizeof(*tx_buf)); - - tx_buf->flags |= MTK_TX_FLAGS_PAGE0; - dma_unmap_addr_set(tx_buf, dma_addr0, - mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, - frag_map_size); - } else { - txd.txd3 = mapped_addr; - txd.txd2 |= TX_DMA_PLEN1(frag_map_size); - - tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC; - tx_buf->flags |= MTK_TX_FLAGS_PAGE1; - dma_unmap_addr_set(tx_buf, dma_addr1, - mapped_addr); - dma_unmap_len_set(tx_buf, dma_len1, - frag_map_size); - - if (!((i == (nr_frags - 1)) && - (frag_map_size == frag_size))) { - mtk_set_txd_pdma(&txd, - &ring->tx_dma[j]); - memset(&txd, 0, sizeof(txd)); - } - } - frag_size -= frag_map_size; - offset += frag_map_size; - k++; - } - } - - /* set last segment */ - if (k & 0x1) - txd.txd2 |= TX_DMA_LS1; - else - txd.txd2 |= TX_DMA_LS0; - mtk_set_txd_pdma(&txd, &ring->tx_dma[j]); - - /* store skb to cleanup */ - tx_buf->skb = skb; - - netdev_sent_queue(dev, skb->len); - skb_tx_timestamp(skb); - - ring->tx_next_idx = NEXT_TX_DESP_IDX(j); - /* make sure that all changes to the dma ring are flushed before we - * continue - */ - wmb(); - atomic_set(&ring->tx_free_count, mtk_pdma_empty_txd(ring)); - - if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more) - mtk_reg_w32(eth, ring->tx_next_idx, MTK_REG_TX_CTX_IDX0); - - return 0; - -err_dma: - j = ring->tx_next_idx; - for (i = 0; i < tx_num; i++) { - ptxd = &ring->tx_dma[j]; - tx_buf = &ring->tx_buf[j]; - - /* unmap dma */ - mtk_txd_unmap(&dev->dev, tx_buf); - - ptxd->txd2 = TX_DMA_DESP2_DEF; - j = NEXT_TX_DESP_IDX(j); - } - /* make sure that all changes to the dma ring are flushed before we - * continue - */ - wmb(); - return -1; -} - -/* the qdma core needs scratch memory to be setup */ -static int mtk_init_fq_dma(struct mtk_eth *eth) -{ - dma_addr_t dma_addr, phy_ring_head, phy_ring_tail; - int cnt = eth->soc->dma_ring_size; - int i; - - eth->scratch_ring = dma_alloc_coherent(eth->dev, - cnt * sizeof(struct mtk_tx_dma), - &phy_ring_head, - GFP_ATOMIC | __GFP_ZERO); - if (unlikely(!eth->scratch_ring)) - return -ENOMEM; - - eth->scratch_head = kcalloc(cnt, QDMA_PAGE_SIZE, - GFP_KERNEL); - dma_addr = dma_map_single(eth->dev, - eth->scratch_head, cnt * QDMA_PAGE_SIZE, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(eth->dev, dma_addr))) - return -ENOMEM; - - memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt); - phy_ring_tail = phy_ring_head + (sizeof(struct mtk_tx_dma) * (cnt - 1)); - - for (i = 0; i < cnt; i++) { - eth->scratch_ring[i].txd1 = (dma_addr + (i * QDMA_PAGE_SIZE)); - if (i < cnt - 1) - eth->scratch_ring[i].txd2 = (phy_ring_head + - ((i + 1) * sizeof(struct mtk_tx_dma))); - eth->scratch_ring[i].txd3 = TX_QDMA_SDL(QDMA_PAGE_SIZE); - } - - mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD); - mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL); - mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT); - mtk_w32(eth, QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN); - - return 0; -} - -static void *mtk_qdma_phys_to_virt(struct mtk_tx_ring *ring, u32 desc) -{ - void *ret = ring->tx_dma; - - return ret + (desc - ring->tx_phys); -} - -static struct mtk_tx_dma *mtk_tx_next_qdma(struct mtk_tx_ring *ring, - struct mtk_tx_dma *txd) -{ - return mtk_qdma_phys_to_virt(ring, txd->txd2); -} - -static struct mtk_tx_buf *mtk_desc_to_tx_buf(struct mtk_tx_ring *ring, - struct mtk_tx_dma *txd) -{ - int idx = txd - ring->tx_dma; - - return &ring->tx_buf[idx]; -} - -static int mtk_qdma_tx_map(struct sk_buff *skb, struct net_device *dev, - int tx_num, struct mtk_tx_ring *ring, bool gso) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - struct mtk_tx_dma *itxd, *txd; - struct mtk_tx_buf *tx_buf; - dma_addr_t mapped_addr; - unsigned int nr_frags; - int i, n_desc = 1; - u32 txd4 = eth->soc->txd4; - - itxd = ring->tx_next_free; - if (itxd == ring->tx_last_free) - return -ENOMEM; - - if (eth->soc->mac_count > 1) - txd4 |= (mac->id + 1) << TX_DMA_FPORT_SHIFT; - - tx_buf = mtk_desc_to_tx_buf(ring, itxd); - memset(tx_buf, 0, sizeof(*tx_buf)); - - if (gso) - txd4 |= TX_DMA_TSO; - - /* TX Checksum offload */ - if (skb->ip_summed == CHECKSUM_PARTIAL) - txd4 |= TX_DMA_CHKSUM; - - /* VLAN header offload */ - if (skb_vlan_tag_present(skb)) - txd4 |= TX_DMA_INS_VLAN_MT7621 | skb_vlan_tag_get(skb); - - mapped_addr = dma_map_single(&dev->dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) - return -ENOMEM; - - WRITE_ONCE(itxd->txd1, mapped_addr); - tx_buf->flags |= MTK_TX_FLAGS_SINGLE0; - dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, skb_headlen(skb)); - - /* TX SG offload */ - txd = itxd; - nr_frags = skb_shinfo(skb)->nr_frags; - for (i = 0; i < nr_frags; i++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; - unsigned int offset = 0; - int frag_size = skb_frag_size(frag); - - while (frag_size) { - bool last_frag = false; - unsigned int frag_map_size; - - txd = mtk_tx_next_qdma(ring, txd); - if (txd == ring->tx_last_free) - goto err_dma; - - n_desc++; - frag_map_size = min(frag_size, TX_DMA_BUF_LEN); - mapped_addr = skb_frag_dma_map(&dev->dev, frag, offset, - frag_map_size, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&dev->dev, mapped_addr))) - goto err_dma; - - if (i == nr_frags - 1 && - (frag_size - frag_map_size) == 0) - last_frag = true; - - WRITE_ONCE(txd->txd1, mapped_addr); - WRITE_ONCE(txd->txd3, (QDMA_TX_SWC | - TX_DMA_PLEN0(frag_map_size) | - last_frag * TX_DMA_LS0) | - mac->id); - WRITE_ONCE(txd->txd4, 0); - - tx_buf->skb = (struct sk_buff *)DMA_DUMMY_DESC; - tx_buf = mtk_desc_to_tx_buf(ring, txd); - memset(tx_buf, 0, sizeof(*tx_buf)); - - tx_buf->flags |= MTK_TX_FLAGS_PAGE0; - dma_unmap_addr_set(tx_buf, dma_addr0, mapped_addr); - dma_unmap_len_set(tx_buf, dma_len0, frag_map_size); - frag_size -= frag_map_size; - offset += frag_map_size; - } - } - - /* store skb to cleanup */ - tx_buf->skb = skb; - - WRITE_ONCE(itxd->txd4, txd4); - WRITE_ONCE(itxd->txd3, (QDMA_TX_SWC | TX_DMA_PLEN0(skb_headlen(skb)) | - (!nr_frags * TX_DMA_LS0))); - - netdev_sent_queue(dev, skb->len); - skb_tx_timestamp(skb); - - ring->tx_next_free = mtk_tx_next_qdma(ring, txd); - atomic_sub(n_desc, &ring->tx_free_count); - - /* make sure that all changes to the dma ring are flushed before we - * continue - */ - wmb(); - - if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)) || !skb->xmit_more) - mtk_w32(eth, txd->txd2, MTK_QTX_CTX_PTR); - - return 0; - -err_dma: - do { - tx_buf = mtk_desc_to_tx_buf(ring, txd); - - /* unmap dma */ - mtk_txd_unmap(&dev->dev, tx_buf); - - itxd->txd3 = TX_DMA_DESP2_DEF; - itxd = mtk_tx_next_qdma(ring, itxd); - } while (itxd != txd); - - return -ENOMEM; -} - -static inline int mtk_cal_txd_req(struct sk_buff *skb) -{ - int i, nfrags; - struct skb_frag_struct *frag; - - nfrags = 1; - if (skb_is_gso(skb)) { - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - frag = &skb_shinfo(skb)->frags[i]; - nfrags += DIV_ROUND_UP(frag->size, TX_DMA_BUF_LEN); - } - } else { - nfrags += skb_shinfo(skb)->nr_frags; - } - - return DIV_ROUND_UP(nfrags, 2); -} - -static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - struct mtk_tx_ring *ring = ð->tx_ring; - struct net_device_stats *stats = &dev->stats; - int tx_num; - int len = skb->len; - bool gso = false; - - tx_num = mtk_cal_txd_req(skb); - if (unlikely(atomic_read(&ring->tx_free_count) <= tx_num)) { - netif_stop_queue(dev); - netif_err(eth, tx_queued, dev, - "Tx Ring full when queue awake!\n"); - return NETDEV_TX_BUSY; - } - - /* TSO: fill MSS info in tcp checksum field */ - if (skb_is_gso(skb)) { - if (skb_cow_head(skb, 0)) { - netif_warn(eth, tx_err, dev, - "GSO expand head fail.\n"); - goto drop; - } - - if (skb_shinfo(skb)->gso_type & - (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)) { - gso = true; - tcp_hdr(skb)->check = htons(skb_shinfo(skb)->gso_size); - } - } - - if (ring->tx_map(skb, dev, tx_num, ring, gso) < 0) - goto drop; - - stats->tx_packets++; - stats->tx_bytes += len; - - if (unlikely(atomic_read(&ring->tx_free_count) <= ring->tx_thresh)) { - netif_stop_queue(dev); - smp_mb(); - if (unlikely(atomic_read(&ring->tx_free_count) > - ring->tx_thresh)) - netif_wake_queue(dev); - } - - return NETDEV_TX_OK; - -drop: - stats->tx_dropped++; - dev_kfree_skb(skb); - return NETDEV_TX_OK; -} - -static int mtk_poll_rx(struct napi_struct *napi, int budget, - struct mtk_eth *eth, u32 rx_intr) -{ - struct mtk_soc_data *soc = eth->soc; - struct mtk_rx_ring *ring = ð->rx_ring[0]; - int idx = ring->rx_calc_idx; - u32 checksum_bit; - struct sk_buff *skb; - u8 *data, *new_data; - struct mtk_rx_dma *rxd, trxd; - int done = 0, pad; - - if (eth->soc->hw_features & NETIF_F_RXCSUM) - checksum_bit = soc->checksum_bit; - else - checksum_bit = 0; - - if (eth->soc->rx_2b_offset) - pad = 0; - else - pad = NET_IP_ALIGN; - - while (done < budget) { - struct net_device *netdev; - unsigned int pktlen; - dma_addr_t dma_addr; - int mac = 0; - - idx = NEXT_RX_DESP_IDX(idx); - rxd = &ring->rx_dma[idx]; - data = ring->rx_data[idx]; - - mtk_get_rxd(&trxd, rxd); - if (!(trxd.rxd2 & RX_DMA_DONE)) - break; - - /* find out which mac the packet come from. values start at 1 */ - if (eth->soc->mac_count > 1) { - mac = (trxd.rxd4 >> RX_DMA_FPORT_SHIFT) & - RX_DMA_FPORT_MASK; - mac--; - if (mac < 0 || mac >= eth->soc->mac_count) - goto release_desc; - } - - netdev = eth->netdev[mac]; - - /* alloc new buffer */ - new_data = napi_alloc_frag(ring->frag_size); - if (unlikely(!new_data || !netdev)) { - netdev->stats.rx_dropped++; - goto release_desc; - } - dma_addr = dma_map_single(&netdev->dev, - new_data + NET_SKB_PAD + pad, - ring->rx_buf_size, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) { - skb_free_frag(new_data); - goto release_desc; - } - - /* receive data */ - skb = build_skb(data, ring->frag_size); - if (unlikely(!skb)) { - put_page(virt_to_head_page(new_data)); - goto release_desc; - } - skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); - - dma_unmap_single(&netdev->dev, trxd.rxd1, - ring->rx_buf_size, DMA_FROM_DEVICE); - pktlen = RX_DMA_GET_PLEN0(trxd.rxd2); - skb->dev = netdev; - skb_put(skb, pktlen); - if (trxd.rxd4 & checksum_bit) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb_checksum_none_assert(skb); - skb->protocol = eth_type_trans(skb, netdev); - - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += pktlen; - - if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX && - RX_DMA_VID(trxd.rxd3)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), - RX_DMA_VID(trxd.rxd3)); - napi_gro_receive(napi, skb); - - ring->rx_data[idx] = new_data; - rxd->rxd1 = (unsigned int)dma_addr; - -release_desc: - if (eth->soc->rx_sg_dma) - rxd->rxd2 = RX_DMA_PLEN0(ring->rx_buf_size); - else - rxd->rxd2 = RX_DMA_LSO; - - ring->rx_calc_idx = idx; - /* make sure that all changes to the dma ring are flushed before - * we continue - */ - wmb(); - if (eth->soc->dma_type == MTK_QDMA) - mtk_w32(eth, ring->rx_calc_idx, MTK_QRX_CRX_IDX0); - else - mtk_reg_w32(eth, ring->rx_calc_idx, - MTK_REG_RX_CALC_IDX0); - done++; - } - - if (done < budget) - mtk_irq_ack(eth, rx_intr); - - return done; -} - -static int mtk_pdma_tx_poll(struct mtk_eth *eth, int budget, bool *tx_again) -{ - struct sk_buff *skb; - struct mtk_tx_buf *tx_buf; - int done = 0; - u32 idx, hwidx; - struct mtk_tx_ring *ring = ð->tx_ring; - unsigned int bytes = 0; - - idx = ring->tx_free_idx; - hwidx = mtk_reg_r32(eth, MTK_REG_TX_DTX_IDX0); - - while ((idx != hwidx) && budget) { - tx_buf = &ring->tx_buf[idx]; - skb = tx_buf->skb; - - if (!skb) - break; - - if (skb != (struct sk_buff *)DMA_DUMMY_DESC) { - bytes += skb->len; - done++; - budget--; - } - mtk_txd_unmap(eth->dev, tx_buf); - idx = NEXT_TX_DESP_IDX(idx); - } - ring->tx_free_idx = idx; - atomic_set(&ring->tx_free_count, mtk_pdma_empty_txd(ring)); - - /* read hw index again make sure no new tx packet */ - if (idx != hwidx || idx != mtk_reg_r32(eth, MTK_REG_TX_DTX_IDX0)) - *tx_again = 1; - - if (done) - netdev_completed_queue(*eth->netdev, done, bytes); - - return done; -} - -static int mtk_qdma_tx_poll(struct mtk_eth *eth, int budget, bool *tx_again) -{ - struct mtk_tx_ring *ring = ð->tx_ring; - struct mtk_tx_dma *desc; - struct sk_buff *skb; - struct mtk_tx_buf *tx_buf; - int total = 0, done[MTK_MAX_DEVS]; - unsigned int bytes[MTK_MAX_DEVS]; - u32 cpu, dma; - int i; - - memset(done, 0, sizeof(done)); - memset(bytes, 0, sizeof(bytes)); - - cpu = mtk_r32(eth, MTK_QTX_CRX_PTR); - dma = mtk_r32(eth, MTK_QTX_DRX_PTR); - - desc = mtk_qdma_phys_to_virt(ring, cpu); - - while ((cpu != dma) && budget) { - u32 next_cpu = desc->txd2; - int mac; - - desc = mtk_tx_next_qdma(ring, desc); - if ((desc->txd3 & QDMA_TX_OWNER_CPU) == 0) - break; - - mac = (desc->txd4 >> TX_DMA_FPORT_SHIFT) & - TX_DMA_FPORT_MASK; - mac--; - - tx_buf = mtk_desc_to_tx_buf(ring, desc); - skb = tx_buf->skb; - if (!skb) - break; - - if (skb != (struct sk_buff *)DMA_DUMMY_DESC) { - bytes[mac] += skb->len; - done[mac]++; - budget--; - } - mtk_txd_unmap(eth->dev, tx_buf); - - ring->tx_last_free->txd2 = next_cpu; - ring->tx_last_free = desc; - atomic_inc(&ring->tx_free_count); - - cpu = next_cpu; - } - - mtk_w32(eth, cpu, MTK_QTX_CRX_PTR); - - /* read hw index again make sure no new tx packet */ - if (cpu != dma || cpu != mtk_r32(eth, MTK_QTX_DRX_PTR)) - *tx_again = true; - - for (i = 0; i < eth->soc->mac_count; i++) { - if (!done[i]) - continue; - netdev_completed_queue(eth->netdev[i], done[i], bytes[i]); - total += done[i]; - } - - return total; -} - -static int mtk_poll_tx(struct mtk_eth *eth, int budget, u32 tx_intr, - bool *tx_again) -{ - struct mtk_tx_ring *ring = ð->tx_ring; - struct net_device *netdev = eth->netdev[0]; - int done; - - done = eth->tx_ring.tx_poll(eth, budget, tx_again); - if (!*tx_again) - mtk_irq_ack(eth, tx_intr); - - if (!done) - return 0; - - smp_mb(); - if (unlikely(!netif_queue_stopped(netdev))) - return done; - - if (atomic_read(&ring->tx_free_count) > ring->tx_thresh) - netif_wake_queue(netdev); - - return done; -} - -static void mtk_stats_update(struct mtk_eth *eth) -{ - int i; - - for (i = 0; i < eth->soc->mac_count; i++) { - if (!eth->mac[i] || !eth->mac[i]->hw_stats) - continue; - if (spin_trylock(ð->mac[i]->hw_stats->stats_lock)) { - mtk_stats_update_mac(eth->mac[i]); - spin_unlock(ð->mac[i]->hw_stats->stats_lock); - } - } -} - -static int mtk_poll(struct napi_struct *napi, int budget) -{ - struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi); - u32 status, mtk_status, mask, tx_intr, rx_intr, status_intr; - int tx_done, rx_done; - bool tx_again = false; - - status = mtk_irq_pending(eth); - mtk_status = mtk_irq_pending_status(eth); - tx_intr = eth->soc->tx_int; - rx_intr = eth->soc->rx_int; - status_intr = eth->soc->status_int; - tx_done = 0; - rx_done = 0; - tx_again = 0; - - if (status & tx_intr) - tx_done = mtk_poll_tx(eth, budget, tx_intr, &tx_again); - - if (status & rx_intr) - rx_done = mtk_poll_rx(napi, budget, eth, rx_intr); - - if (unlikely(mtk_status & status_intr)) { - mtk_stats_update(eth); - mtk_irq_ack_status(eth, status_intr); - } - - if (unlikely(netif_msg_intr(eth))) { - mask = mtk_irq_enabled(eth); - netdev_info(eth->netdev[0], - "done tx %d, rx %d, intr 0x%08x/0x%x\n", - tx_done, rx_done, status, mask); - } - - if (tx_again || rx_done == budget) - return budget; - - status = mtk_irq_pending(eth); - if (status & (tx_intr | rx_intr)) - return budget; - - napi_complete(napi); - mtk_irq_enable(eth, tx_intr | rx_intr); - - return rx_done; -} - -static int mtk_pdma_tx_alloc(struct mtk_eth *eth) -{ - int i; - struct mtk_tx_ring *ring = ð->tx_ring; - - ring->tx_ring_size = eth->soc->dma_ring_size; - ring->tx_free_idx = 0; - ring->tx_next_idx = 0; - ring->tx_thresh = max((unsigned long)ring->tx_ring_size >> 2, - MAX_SKB_FRAGS); - - ring->tx_buf = kcalloc(ring->tx_ring_size, sizeof(*ring->tx_buf), - GFP_KERNEL); - if (!ring->tx_buf) - goto no_tx_mem; - - ring->tx_dma = - dma_alloc_coherent(eth->dev, - ring->tx_ring_size * sizeof(*ring->tx_dma), - &ring->tx_phys, GFP_ATOMIC | __GFP_ZERO); - if (!ring->tx_dma) - goto no_tx_mem; - - for (i = 0; i < ring->tx_ring_size; i++) { - ring->tx_dma[i].txd2 = TX_DMA_DESP2_DEF; - ring->tx_dma[i].txd4 = eth->soc->txd4; - } - - atomic_set(&ring->tx_free_count, mtk_pdma_empty_txd(ring)); - ring->tx_map = mtk_pdma_tx_map; - ring->tx_poll = mtk_pdma_tx_poll; - ring->tx_clean = mtk_pdma_tx_clean; - - /* make sure that all changes to the dma ring are flushed before we - * continue - */ - wmb(); - - mtk_reg_w32(eth, ring->tx_phys, MTK_REG_TX_BASE_PTR0); - mtk_reg_w32(eth, ring->tx_ring_size, MTK_REG_TX_MAX_CNT0); - mtk_reg_w32(eth, 0, MTK_REG_TX_CTX_IDX0); - mtk_reg_w32(eth, MTK_PST_DTX_IDX0, MTK_REG_PDMA_RST_CFG); - - return 0; - -no_tx_mem: - return -ENOMEM; -} - -static int mtk_qdma_tx_alloc_tx(struct mtk_eth *eth) -{ - struct mtk_tx_ring *ring = ð->tx_ring; - int i, sz = sizeof(*ring->tx_dma); - - ring->tx_ring_size = eth->soc->dma_ring_size; - ring->tx_buf = kcalloc(ring->tx_ring_size, sizeof(*ring->tx_buf), - GFP_KERNEL); - if (!ring->tx_buf) - goto no_tx_mem; - - ring->tx_dma = dma_alloc_coherent(eth->dev, ring->tx_ring_size * sz, - &ring->tx_phys, - GFP_ATOMIC | __GFP_ZERO); - if (!ring->tx_dma) - goto no_tx_mem; - - for (i = 0; i < ring->tx_ring_size; i++) { - int next = (i + 1) % ring->tx_ring_size; - u32 next_ptr = ring->tx_phys + next * sz; - - ring->tx_dma[i].txd2 = next_ptr; - ring->tx_dma[i].txd3 = TX_DMA_DESP2_DEF; - } - - atomic_set(&ring->tx_free_count, ring->tx_ring_size - 2); - ring->tx_next_free = &ring->tx_dma[0]; - ring->tx_last_free = &ring->tx_dma[ring->tx_ring_size - 2]; - ring->tx_thresh = max((unsigned long)ring->tx_ring_size >> 2, - MAX_SKB_FRAGS); - - ring->tx_map = mtk_qdma_tx_map; - ring->tx_poll = mtk_qdma_tx_poll; - ring->tx_clean = mtk_qdma_tx_clean; - - /* make sure that all changes to the dma ring are flushed before we - * continue - */ - wmb(); - - mtk_w32(eth, ring->tx_phys, MTK_QTX_CTX_PTR); - mtk_w32(eth, ring->tx_phys, MTK_QTX_DTX_PTR); - mtk_w32(eth, - ring->tx_phys + ((ring->tx_ring_size - 1) * sz), - MTK_QTX_CRX_PTR); - mtk_w32(eth, - ring->tx_phys + ((ring->tx_ring_size - 1) * sz), - MTK_QTX_DRX_PTR); - - return 0; - -no_tx_mem: - return -ENOMEM; -} - -static int mtk_qdma_init(struct mtk_eth *eth, int ring) -{ - int err; - - err = mtk_init_fq_dma(eth); - if (err) - return err; - - err = mtk_qdma_tx_alloc_tx(eth); - if (err) - return err; - - err = mtk_dma_rx_alloc(eth, ð->rx_ring[ring]); - if (err) - return err; - - mtk_w32(eth, eth->rx_ring[ring].rx_phys, MTK_QRX_BASE_PTR0); - mtk_w32(eth, eth->rx_ring[ring].rx_ring_size, MTK_QRX_MAX_CNT0); - mtk_w32(eth, eth->rx_ring[ring].rx_calc_idx, MTK_QRX_CRX_IDX0); - mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX); - mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0)); - - /* Enable random early drop and set drop threshold automatically */ - mtk_w32(eth, 0x174444, MTK_QDMA_FC_THRES); - mtk_w32(eth, 0x0, MTK_QDMA_HRED2); - - return 0; -} - -static int mtk_pdma_qdma_init(struct mtk_eth *eth) -{ - int err = mtk_qdma_init(eth, 1); - - if (err) - return err; - - err = mtk_dma_rx_alloc(eth, ð->rx_ring[0]); - if (err) - return err; - - mtk_reg_w32(eth, eth->rx_ring[0].rx_phys, MTK_REG_RX_BASE_PTR0); - mtk_reg_w32(eth, eth->rx_ring[0].rx_ring_size, MTK_REG_RX_MAX_CNT0); - mtk_reg_w32(eth, eth->rx_ring[0].rx_calc_idx, MTK_REG_RX_CALC_IDX0); - mtk_reg_w32(eth, MTK_PST_DRX_IDX0, MTK_REG_PDMA_RST_CFG); - - return 0; -} - -static int mtk_pdma_init(struct mtk_eth *eth) -{ - struct mtk_rx_ring *ring = ð->rx_ring[0]; - int err; - - err = mtk_pdma_tx_alloc(eth); - if (err) - return err; - - err = mtk_dma_rx_alloc(eth, ring); - if (err) - return err; - - mtk_reg_w32(eth, ring->rx_phys, MTK_REG_RX_BASE_PTR0); - mtk_reg_w32(eth, ring->rx_ring_size, MTK_REG_RX_MAX_CNT0); - mtk_reg_w32(eth, ring->rx_calc_idx, MTK_REG_RX_CALC_IDX0); - mtk_reg_w32(eth, MTK_PST_DRX_IDX0, MTK_REG_PDMA_RST_CFG); - - return 0; -} - -static void mtk_dma_free(struct mtk_eth *eth) -{ - int i; - - for (i = 0; i < eth->soc->mac_count; i++) - if (eth->netdev[i]) - netdev_reset_queue(eth->netdev[i]); - eth->tx_ring.tx_clean(eth); - mtk_clean_rx(eth, ð->rx_ring[0]); - mtk_clean_rx(eth, ð->rx_ring[1]); - kfree(eth->scratch_head); -} - -static void mtk_tx_timeout(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - struct mtk_tx_ring *ring = ð->tx_ring; - - eth->netdev[mac->id]->stats.tx_errors++; - netif_err(eth, tx_err, dev, - "transmit timed out\n"); - if (eth->soc->dma_type & MTK_PDMA) { - netif_info(eth, drv, dev, "pdma_cfg:%08x\n", - mtk_reg_r32(eth, MTK_REG_PDMA_GLO_CFG)); - netif_info(eth, drv, dev, - "tx_ring=%d, base=%08x, max=%u, ctx=%u, dtx=%u, fdx=%hu, next=%hu\n", - 0, mtk_reg_r32(eth, MTK_REG_TX_BASE_PTR0), - mtk_reg_r32(eth, MTK_REG_TX_MAX_CNT0), - mtk_reg_r32(eth, MTK_REG_TX_CTX_IDX0), - mtk_reg_r32(eth, MTK_REG_TX_DTX_IDX0), - ring->tx_free_idx, - ring->tx_next_idx); - } - if (eth->soc->dma_type & MTK_QDMA) { - netif_info(eth, drv, dev, "qdma_cfg:%08x\n", - mtk_r32(eth, MTK_QDMA_GLO_CFG)); - netif_info(eth, drv, dev, - "tx_ring=%d, ctx=%08x, dtx=%08x, crx=%08x, drx=%08x, free=%hu\n", - 0, mtk_r32(eth, MTK_QTX_CTX_PTR), - mtk_r32(eth, MTK_QTX_DTX_PTR), - mtk_r32(eth, MTK_QTX_CRX_PTR), - mtk_r32(eth, MTK_QTX_DRX_PTR), - atomic_read(&ring->tx_free_count)); - } - netif_info(eth, drv, dev, - "rx_ring=%d, base=%08x, max=%u, calc=%u, drx=%u\n", - 0, mtk_reg_r32(eth, MTK_REG_RX_BASE_PTR0), - mtk_reg_r32(eth, MTK_REG_RX_MAX_CNT0), - mtk_reg_r32(eth, MTK_REG_RX_CALC_IDX0), - mtk_reg_r32(eth, MTK_REG_RX_DRX_IDX0)); - - schedule_work(&mac->pending_work); -} - -static irqreturn_t mtk_handle_irq(int irq, void *_eth) -{ - struct mtk_eth *eth = _eth; - u32 status, int_mask; - - status = mtk_irq_pending(eth); - if (unlikely(!status)) - return IRQ_NONE; - - int_mask = (eth->soc->rx_int | eth->soc->tx_int); - if (likely(status & int_mask)) { - if (likely(napi_schedule_prep(ð->rx_napi))) - __napi_schedule(ð->rx_napi); - } else { - mtk_irq_ack(eth, status); - } - mtk_irq_disable(eth, int_mask); - - return IRQ_HANDLED; -} - -#ifdef CONFIG_NET_POLL_CONTROLLER -static void mtk_poll_controller(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - u32 int_mask = eth->soc->tx_int | eth->soc->rx_int; - - mtk_irq_disable(eth, int_mask); - mtk_handle_irq(dev->irq, dev); - mtk_irq_enable(eth, int_mask); -} -#endif - -int mtk_set_clock_cycle(struct mtk_eth *eth) -{ - unsigned long sysclk = eth->sysclk; - - sysclk /= MTK_US_CYC_CNT_DIVISOR; - sysclk <<= MTK_US_CYC_CNT_SHIFT; - - mtk_w32(eth, (mtk_r32(eth, MTK_GLO_CFG) & - ~(MTK_US_CYC_CNT_MASK << MTK_US_CYC_CNT_SHIFT)) | - sysclk, - MTK_GLO_CFG); - return 0; -} - -void mtk_fwd_config(struct mtk_eth *eth) -{ - u32 fwd_cfg; - - fwd_cfg = mtk_r32(eth, MTK_GDMA1_FWD_CFG); - - /* disable jumbo frame */ - if (eth->soc->jumbo_frame) - fwd_cfg &= ~MTK_GDM1_JMB_EN; - - /* set unicast/multicast/broadcast frame to cpu */ - fwd_cfg &= ~0xffff; - - mtk_w32(eth, fwd_cfg, MTK_GDMA1_FWD_CFG); -} - -void mtk_csum_config(struct mtk_eth *eth) -{ - if (eth->soc->hw_features & NETIF_F_RXCSUM) - mtk_w32(eth, mtk_r32(eth, MTK_GDMA1_FWD_CFG) | - (MTK_GDM1_ICS_EN | MTK_GDM1_TCS_EN | MTK_GDM1_UCS_EN), - MTK_GDMA1_FWD_CFG); - else - mtk_w32(eth, mtk_r32(eth, MTK_GDMA1_FWD_CFG) & - ~(MTK_GDM1_ICS_EN | MTK_GDM1_TCS_EN | MTK_GDM1_UCS_EN), - MTK_GDMA1_FWD_CFG); - if (eth->soc->hw_features & NETIF_F_IP_CSUM) - mtk_w32(eth, mtk_r32(eth, MTK_CDMA_CSG_CFG) | - (MTK_ICS_GEN_EN | MTK_TCS_GEN_EN | MTK_UCS_GEN_EN), - MTK_CDMA_CSG_CFG); - else - mtk_w32(eth, mtk_r32(eth, MTK_CDMA_CSG_CFG) & - ~(MTK_ICS_GEN_EN | MTK_TCS_GEN_EN | MTK_UCS_GEN_EN), - MTK_CDMA_CSG_CFG); -} - -static int mtk_start_dma(struct mtk_eth *eth) -{ - unsigned long flags; - u32 val; - int err; - - if (eth->soc->dma_type == MTK_PDMA) - err = mtk_pdma_init(eth); - else if (eth->soc->dma_type == MTK_QDMA) - err = mtk_qdma_init(eth, 0); - else - err = mtk_pdma_qdma_init(eth); - if (err) { - mtk_dma_free(eth); - return err; - } - - spin_lock_irqsave(ð->page_lock, flags); - - val = MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN; - if (eth->soc->rx_2b_offset) - val |= MTK_RX_2B_OFFSET; - val |= eth->soc->pdma_glo_cfg; - - if (eth->soc->dma_type & MTK_PDMA) - mtk_reg_w32(eth, val, MTK_REG_PDMA_GLO_CFG); - - if (eth->soc->dma_type & MTK_QDMA) - mtk_w32(eth, val, MTK_QDMA_GLO_CFG); - - spin_unlock_irqrestore(ð->page_lock, flags); - - return 0; -} - -static int mtk_open(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - - dma_coerce_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)); - - if (!atomic_read(ð->dma_refcnt)) { - int err = mtk_start_dma(eth); - - if (err) - return err; - - napi_enable(ð->rx_napi); - mtk_irq_enable(eth, eth->soc->tx_int | eth->soc->rx_int); - } - atomic_inc(ð->dma_refcnt); - - if (eth->phy) - eth->phy->start(mac); - - if (eth->soc->has_carrier && eth->soc->has_carrier(eth)) - netif_carrier_on(dev); - - netif_start_queue(dev); - eth->soc->fwd_config(eth); - - return 0; -} - -static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg) -{ - unsigned long flags; - u32 val; - int i; - - /* stop the dma enfine */ - spin_lock_irqsave(ð->page_lock, flags); - val = mtk_r32(eth, glo_cfg); - mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN), - glo_cfg); - spin_unlock_irqrestore(ð->page_lock, flags); - - /* wait for dma stop */ - for (i = 0; i < 10; i++) { - val = mtk_r32(eth, glo_cfg); - if (val & (MTK_TX_DMA_BUSY | MTK_RX_DMA_BUSY)) { - msleep(20); - continue; - } - break; - } -} - -static int mtk_stop(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - - netif_tx_disable(dev); - if (eth->phy) - eth->phy->stop(mac); - - if (!atomic_dec_and_test(ð->dma_refcnt)) - return 0; - - mtk_irq_disable(eth, eth->soc->tx_int | eth->soc->rx_int); - napi_disable(ð->rx_napi); - - if (eth->soc->dma_type & MTK_PDMA) - mtk_stop_dma(eth, mtk_reg_table[MTK_REG_PDMA_GLO_CFG]); - - if (eth->soc->dma_type & MTK_QDMA) - mtk_stop_dma(eth, MTK_QDMA_GLO_CFG); - - mtk_dma_free(eth); - - return 0; -} - -static int __init mtk_init_hw(struct mtk_eth *eth) -{ - int i, err; - - eth->soc->reset_fe(eth); - - if (eth->soc->switch_init) - if (eth->soc->switch_init(eth)) { - dev_err(eth->dev, "failed to initialize switch core\n"); - return -ENODEV; - } - - err = devm_request_irq(eth->dev, eth->irq, mtk_handle_irq, 0, - dev_name(eth->dev), eth); - if (err) - return err; - - err = mtk_mdio_init(eth); - if (err) - return err; - - /* disable delay and normal interrupt */ - mtk_reg_w32(eth, 0, MTK_REG_DLY_INT_CFG); - if (eth->soc->dma_type & MTK_QDMA) - mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); - mtk_irq_disable(eth, eth->soc->tx_int | eth->soc->rx_int); - - /* frame engine will push VLAN tag regarding to VIDX field in Tx desc */ - if (mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE]) - for (i = 0; i < 16; i += 2) - mtk_w32(eth, ((i + 1) << 16) + i, - mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE] + - (i * 2)); - - if (eth->soc->fwd_config(eth)) - dev_err(eth->dev, "unable to get clock\n"); - - if (mtk_reg_table[MTK_REG_MTK_RST_GL]) { - mtk_reg_w32(eth, 1, MTK_REG_MTK_RST_GL); - mtk_reg_w32(eth, 0, MTK_REG_MTK_RST_GL); - } - - return 0; -} - -static int __init mtk_init(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - struct device_node *port; - const char *mac_addr; - int err; - - mac_addr = of_get_mac_address(mac->of_node); - if (mac_addr) - ether_addr_copy(dev->dev_addr, mac_addr); - - /* If the mac address is invalid, use random mac address */ - if (!is_valid_ether_addr(dev->dev_addr)) { - eth_hw_addr_random(dev); - dev_err(eth->dev, "generated random MAC address %pM\n", - dev->dev_addr); - } - mac->hw->soc->set_mac(mac, dev->dev_addr); - - if (eth->soc->port_init) - for_each_child_of_node(mac->of_node, port) - if (of_device_is_compatible(port, - "mediatek,eth-port") && - of_device_is_available(port)) - eth->soc->port_init(eth, mac, port); - - if (eth->phy) { - err = eth->phy->connect(mac); - if (err) - return err; - } - - return 0; -} - -static void mtk_uninit(struct net_device *dev) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - - if (eth->phy) - eth->phy->disconnect(mac); - mtk_mdio_cleanup(eth); - - mtk_irq_disable(eth, ~0); - free_irq(dev->irq, dev); -} - -static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) -{ - struct mtk_mac *mac = netdev_priv(dev); - - if (!mac->phy_dev) - return -ENODEV; - - switch (cmd) { - case SIOCGMIIPHY: - case SIOCGMIIREG: - case SIOCSMIIREG: - return phy_mii_ioctl(mac->phy_dev, ifr, cmd); - default: - break; - } - - return -EOPNOTSUPP; -} - -static int mtk_change_mtu(struct net_device *dev, int new_mtu) -{ - struct mtk_mac *mac = netdev_priv(dev); - struct mtk_eth *eth = mac->hw; - int frag_size, old_mtu; - u32 fwd_cfg; - - if (!eth->soc->jumbo_frame) - return eth_change_mtu(dev, new_mtu); - - frag_size = mtk_max_frag_size(new_mtu); - if (new_mtu < 68 || frag_size > PAGE_SIZE) - return -EINVAL; - - old_mtu = dev->mtu; - dev->mtu = new_mtu; - - /* return early if the buffer sizes will not change */ - if (old_mtu <= ETH_DATA_LEN && new_mtu <= ETH_DATA_LEN) - return 0; - if (old_mtu > ETH_DATA_LEN && new_mtu > ETH_DATA_LEN) - return 0; - - if (new_mtu <= ETH_DATA_LEN) - eth->rx_ring[0].frag_size = mtk_max_frag_size(ETH_DATA_LEN); - else - eth->rx_ring[0].frag_size = PAGE_SIZE; - eth->rx_ring[0].rx_buf_size = - mtk_max_buf_size(eth->rx_ring[0].frag_size); - - if (!netif_running(dev)) - return 0; - - mtk_stop(dev); - fwd_cfg = mtk_r32(eth, MTK_GDMA1_FWD_CFG); - if (new_mtu <= ETH_DATA_LEN) { - fwd_cfg &= ~MTK_GDM1_JMB_EN; - } else { - fwd_cfg &= ~(MTK_GDM1_JMB_LEN_MASK << MTK_GDM1_JMB_LEN_SHIFT); - fwd_cfg |= (DIV_ROUND_UP(frag_size, 1024) << - MTK_GDM1_JMB_LEN_SHIFT) | MTK_GDM1_JMB_EN; - } - mtk_w32(eth, fwd_cfg, MTK_GDMA1_FWD_CFG); - - return mtk_open(dev); -} - -static void mtk_pending_work(struct work_struct *work) -{ - struct mtk_mac *mac = container_of(work, struct mtk_mac, pending_work); - struct mtk_eth *eth = mac->hw; - struct net_device *dev = eth->netdev[mac->id]; - int err; - - rtnl_lock(); - mtk_stop(dev); - - err = mtk_open(dev); - if (err) { - netif_alert(eth, ifup, dev, - "Driver up/down cycle failed, closing device.\n"); - dev_close(dev); - } - rtnl_unlock(); -} - -static int mtk_cleanup(struct mtk_eth *eth) -{ - int i; - - for (i = 0; i < eth->soc->mac_count; i++) { - struct mtk_mac *mac = netdev_priv(eth->netdev[i]); - - if (!eth->netdev[i]) - continue; - - unregister_netdev(eth->netdev[i]); - free_netdev(eth->netdev[i]); - cancel_work_sync(&mac->pending_work); - } - - return 0; -} - -static const struct net_device_ops mtk_netdev_ops = { - .ndo_init = mtk_init, - .ndo_uninit = mtk_uninit, - .ndo_open = mtk_open, - .ndo_stop = mtk_stop, - .ndo_start_xmit = mtk_start_xmit, - .ndo_set_mac_address = mtk_set_mac_address, - .ndo_validate_addr = eth_validate_addr, - .ndo_do_ioctl = mtk_do_ioctl, - .ndo_change_mtu = mtk_change_mtu, - .ndo_tx_timeout = mtk_tx_timeout, - .ndo_get_stats64 = mtk_get_stats64, - .ndo_vlan_rx_add_vid = mtk_vlan_rx_add_vid, - .ndo_vlan_rx_kill_vid = mtk_vlan_rx_kill_vid, -#ifdef CONFIG_NET_POLL_CONTROLLER - .ndo_poll_controller = mtk_poll_controller, -#endif -}; - -static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) -{ - struct mtk_mac *mac; - const __be32 *_id = of_get_property(np, "reg", NULL); - int id, err; - - if (!_id) { - dev_err(eth->dev, "missing mac id\n"); - return -EINVAL; - } - id = be32_to_cpup(_id); - if (id >= eth->soc->mac_count || eth->netdev[id]) { - dev_err(eth->dev, "%d is not a valid mac id\n", id); - return -EINVAL; - } - - eth->netdev[id] = alloc_etherdev(sizeof(*mac)); - if (!eth->netdev[id]) { - dev_err(eth->dev, "alloc_etherdev failed\n"); - return -ENOMEM; - } - mac = netdev_priv(eth->netdev[id]); - eth->mac[id] = mac; - mac->id = id; - mac->hw = eth; - mac->of_node = np; - INIT_WORK(&mac->pending_work, mtk_pending_work); - - if (mtk_reg_table[MTK_REG_MTK_COUNTER_BASE]) { - mac->hw_stats = devm_kzalloc(eth->dev, - sizeof(*mac->hw_stats), - GFP_KERNEL); - if (!mac->hw_stats) { - err = -ENOMEM; - goto free_netdev; - } - spin_lock_init(&mac->hw_stats->stats_lock); - mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET; - } - - SET_NETDEV_DEV(eth->netdev[id], eth->dev); - eth->netdev[id]->netdev_ops = &mtk_netdev_ops; - eth->netdev[id]->base_addr = (unsigned long)eth->base; - - if (eth->soc->init_data) - eth->soc->init_data(eth->soc, eth->netdev[id]); - - eth->netdev[id]->vlan_features = eth->soc->hw_features & - ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); - eth->netdev[id]->features |= eth->soc->hw_features; - - if (mtk_reg_table[MTK_REG_MTK_DMA_VID_BASE]) - eth->netdev[id]->features |= NETIF_F_HW_VLAN_CTAG_FILTER; - - mtk_set_ethtool_ops(eth->netdev[id]); - - err = register_netdev(eth->netdev[id]); - if (err) { - dev_err(eth->dev, "error bringing up device\n"); - err = -ENOMEM; - goto free_netdev; - } - eth->netdev[id]->irq = eth->irq; - netif_info(eth, probe, eth->netdev[id], - "mediatek frame engine at 0x%08lx, irq %d\n", - eth->netdev[id]->base_addr, eth->netdev[id]->irq); - - return 0; - -free_netdev: - free_netdev(eth->netdev[id]); - return err; -} - -static int mtk_probe(struct platform_device *pdev) -{ - struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - const struct of_device_id *match; - struct device_node *mac_np; - struct mtk_soc_data *soc; - struct mtk_eth *eth; - struct clk *sysclk; - int err; - - device_reset(&pdev->dev); - - match = of_match_device(of_mtk_match, &pdev->dev); - soc = (struct mtk_soc_data *)match->data; - - if (soc->reg_table) - mtk_reg_table = soc->reg_table; - - eth = devm_kzalloc(&pdev->dev, sizeof(*eth), GFP_KERNEL); - if (!eth) - return -ENOMEM; - - eth->base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(eth->base)) - return PTR_ERR(eth->base); - - spin_lock_init(ð->page_lock); - - eth->ethsys = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, - "mediatek,ethsys"); - if (IS_ERR(eth->ethsys)) - return PTR_ERR(eth->ethsys); - - eth->irq = platform_get_irq(pdev, 0); - if (eth->irq < 0) { - dev_err(&pdev->dev, "no IRQ resource found\n"); - return -ENXIO; - } - - sysclk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(sysclk)) { - dev_err(&pdev->dev, - "the clock is not defined in the devicetree\n"); - return -ENXIO; - } - eth->sysclk = clk_get_rate(sysclk); - - eth->switch_np = of_parse_phandle(pdev->dev.of_node, - "mediatek,switch", 0); - if (soc->has_switch && !eth->switch_np) { - dev_err(&pdev->dev, "failed to read switch phandle\n"); - return -ENODEV; - } - - eth->dev = &pdev->dev; - eth->soc = soc; - eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE); - - err = mtk_init_hw(eth); - if (err) - return err; - - if (eth->soc->mac_count > 1) { - for_each_child_of_node(pdev->dev.of_node, mac_np) { - if (!of_device_is_compatible(mac_np, - "mediatek,eth-mac")) - continue; - - if (!of_device_is_available(mac_np)) - continue; - - err = mtk_add_mac(eth, mac_np); - if (err) - goto err_free_dev; - } - - init_dummy_netdev(ð->dummy_dev); - netif_napi_add(ð->dummy_dev, ð->rx_napi, mtk_poll, - soc->napi_weight); - } else { - err = mtk_add_mac(eth, pdev->dev.of_node); - if (err) - goto err_free_dev; - netif_napi_add(eth->netdev[0], ð->rx_napi, mtk_poll, - soc->napi_weight); - } - - platform_set_drvdata(pdev, eth); - - return 0; - -err_free_dev: - mtk_cleanup(eth); - return err; -} - -static int mtk_remove(struct platform_device *pdev) -{ - struct mtk_eth *eth = platform_get_drvdata(pdev); - - netif_napi_del(ð->rx_napi); - mtk_cleanup(eth); - platform_set_drvdata(pdev, NULL); - - return 0; -} - -static struct platform_driver mtk_driver = { - .probe = mtk_probe, - .remove = mtk_remove, - .driver = { - .name = "mtk_soc_eth", - .of_match_table = of_mtk_match, - }, -}; - -module_platform_driver(mtk_driver); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("John Crispin "); -MODULE_DESCRIPTION("Ethernet driver for MediaTek SoC"); diff --git a/drivers/staging/mt7621-eth/mtk_eth_soc.h b/drivers/staging/mt7621-eth/mtk_eth_soc.h deleted file mode 100644 index e6ed80433f49..000000000000 --- a/drivers/staging/mt7621-eth/mtk_eth_soc.h +++ /dev/null @@ -1,716 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#ifndef MTK_ETH_H -#define MTK_ETH_H - -#include -#include -#include -#include -#include -#include -#include -#include - -/* these registers have different offsets depending on the SoC. we use a lookup - * table for these - */ -enum mtk_reg { - MTK_REG_PDMA_GLO_CFG = 0, - MTK_REG_PDMA_RST_CFG, - MTK_REG_DLY_INT_CFG, - MTK_REG_TX_BASE_PTR0, - MTK_REG_TX_MAX_CNT0, - MTK_REG_TX_CTX_IDX0, - MTK_REG_TX_DTX_IDX0, - MTK_REG_RX_BASE_PTR0, - MTK_REG_RX_MAX_CNT0, - MTK_REG_RX_CALC_IDX0, - MTK_REG_RX_DRX_IDX0, - MTK_REG_MTK_INT_ENABLE, - MTK_REG_MTK_INT_STATUS, - MTK_REG_MTK_DMA_VID_BASE, - MTK_REG_MTK_COUNTER_BASE, - MTK_REG_MTK_RST_GL, - MTK_REG_MTK_INT_STATUS2, - MTK_REG_COUNT -}; - -/* delayed interrupt bits */ -#define MTK_DELAY_EN_INT 0x80 -#define MTK_DELAY_MAX_INT 0x04 -#define MTK_DELAY_MAX_TOUT 0x04 -#define MTK_DELAY_TIME 20 -#define MTK_DELAY_CHAN (((MTK_DELAY_EN_INT | MTK_DELAY_MAX_INT) << 8) \ - | MTK_DELAY_MAX_TOUT) -#define MTK_DELAY_INIT ((MTK_DELAY_CHAN << 16) | MTK_DELAY_CHAN) -#define MTK_PSE_FQFC_CFG_INIT 0x80504000 -#define MTK_PSE_FQFC_CFG_256Q 0xff908000 - -/* interrupt bits */ -#define MTK_CNT_PPE_AF BIT(31) -#define MTK_CNT_GDM_AF BIT(29) -#define MTK_PSE_P2_FC BIT(26) -#define MTK_PSE_BUF_DROP BIT(24) -#define MTK_GDM_OTHER_DROP BIT(23) -#define MTK_PSE_P1_FC BIT(22) -#define MTK_PSE_P0_FC BIT(21) -#define MTK_PSE_FQ_EMPTY BIT(20) -#define MTK_GE1_STA_CHG BIT(18) -#define MTK_TX_COHERENT BIT(17) -#define MTK_RX_COHERENT BIT(16) -#define MTK_TX_DONE_INT3 BIT(11) -#define MTK_TX_DONE_INT2 BIT(10) -#define MTK_TX_DONE_INT1 BIT(9) -#define MTK_TX_DONE_INT0 BIT(8) -#define MTK_RX_DONE_INT0 BIT(2) -#define MTK_TX_DLY_INT BIT(1) -#define MTK_RX_DLY_INT BIT(0) - -#define MTK_RX_DONE_INT MTK_RX_DONE_INT0 -#define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \ - MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3) - -#define RT5350_RX_DLY_INT BIT(30) -#define RT5350_TX_DLY_INT BIT(28) -#define RT5350_RX_DONE_INT1 BIT(17) -#define RT5350_RX_DONE_INT0 BIT(16) -#define RT5350_TX_DONE_INT3 BIT(3) -#define RT5350_TX_DONE_INT2 BIT(2) -#define RT5350_TX_DONE_INT1 BIT(1) -#define RT5350_TX_DONE_INT0 BIT(0) - -#define RT5350_RX_DONE_INT (RT5350_RX_DONE_INT0 | RT5350_RX_DONE_INT1) -#define RT5350_TX_DONE_INT (RT5350_TX_DONE_INT0 | RT5350_TX_DONE_INT1 | \ - RT5350_TX_DONE_INT2 | RT5350_TX_DONE_INT3) - -/* registers */ -#define MTK_GDMA_OFFSET 0x0020 -#define MTK_PSE_OFFSET 0x0040 -#define MTK_GDMA2_OFFSET 0x0060 -#define MTK_CDMA_OFFSET 0x0080 -#define MTK_DMA_VID0 0x00a8 -#define MTK_PDMA_OFFSET 0x0100 -#define MTK_PPE_OFFSET 0x0200 -#define MTK_CMTABLE_OFFSET 0x0400 -#define MTK_POLICYTABLE_OFFSET 0x1000 - -#define MT7621_GDMA_OFFSET 0x0500 -#define MT7620_GDMA_OFFSET 0x0600 - -#define RT5350_PDMA_OFFSET 0x0800 -#define RT5350_SDM_OFFSET 0x0c00 - -#define MTK_MDIO_ACCESS 0x00 -#define MTK_MDIO_CFG 0x04 -#define MTK_GLO_CFG 0x08 -#define MTK_RST_GL 0x0C -#define MTK_INT_STATUS 0x10 -#define MTK_INT_ENABLE 0x14 -#define MTK_MDIO_CFG2 0x18 -#define MTK_FOC_TS_T 0x1C - -#define MTK_GDMA1_FWD_CFG (MTK_GDMA_OFFSET + 0x00) -#define MTK_GDMA1_SCH_CFG (MTK_GDMA_OFFSET + 0x04) -#define MTK_GDMA1_SHPR_CFG (MTK_GDMA_OFFSET + 0x08) -#define MTK_GDMA1_MAC_ADRL (MTK_GDMA_OFFSET + 0x0C) -#define MTK_GDMA1_MAC_ADRH (MTK_GDMA_OFFSET + 0x10) - -#define MTK_GDMA2_FWD_CFG (MTK_GDMA2_OFFSET + 0x00) -#define MTK_GDMA2_SCH_CFG (MTK_GDMA2_OFFSET + 0x04) -#define MTK_GDMA2_SHPR_CFG (MTK_GDMA2_OFFSET + 0x08) -#define MTK_GDMA2_MAC_ADRL (MTK_GDMA2_OFFSET + 0x0C) -#define MTK_GDMA2_MAC_ADRH (MTK_GDMA2_OFFSET + 0x10) - -#define MTK_PSE_FQ_CFG (MTK_PSE_OFFSET + 0x00) -#define MTK_CDMA_FC_CFG (MTK_PSE_OFFSET + 0x04) -#define MTK_GDMA1_FC_CFG (MTK_PSE_OFFSET + 0x08) -#define MTK_GDMA2_FC_CFG (MTK_PSE_OFFSET + 0x0C) - -#define MTK_CDMA_CSG_CFG (MTK_CDMA_OFFSET + 0x00) -#define MTK_CDMA_SCH_CFG (MTK_CDMA_OFFSET + 0x04) - -#define MT7621_GDMA_FWD_CFG(x) (MT7621_GDMA_OFFSET + (x * 0x1000)) - -/* FIXME this might be different for different SOCs */ -#define MT7620_GDMA1_FWD_CFG (MT7621_GDMA_OFFSET + 0x00) - -#define RT5350_TX_BASE_PTR0 (RT5350_PDMA_OFFSET + 0x00) -#define RT5350_TX_MAX_CNT0 (RT5350_PDMA_OFFSET + 0x04) -#define RT5350_TX_CTX_IDX0 (RT5350_PDMA_OFFSET + 0x08) -#define RT5350_TX_DTX_IDX0 (RT5350_PDMA_OFFSET + 0x0C) -#define RT5350_TX_BASE_PTR1 (RT5350_PDMA_OFFSET + 0x10) -#define RT5350_TX_MAX_CNT1 (RT5350_PDMA_OFFSET + 0x14) -#define RT5350_TX_CTX_IDX1 (RT5350_PDMA_OFFSET + 0x18) -#define RT5350_TX_DTX_IDX1 (RT5350_PDMA_OFFSET + 0x1C) -#define RT5350_TX_BASE_PTR2 (RT5350_PDMA_OFFSET + 0x20) -#define RT5350_TX_MAX_CNT2 (RT5350_PDMA_OFFSET + 0x24) -#define RT5350_TX_CTX_IDX2 (RT5350_PDMA_OFFSET + 0x28) -#define RT5350_TX_DTX_IDX2 (RT5350_PDMA_OFFSET + 0x2C) -#define RT5350_TX_BASE_PTR3 (RT5350_PDMA_OFFSET + 0x30) -#define RT5350_TX_MAX_CNT3 (RT5350_PDMA_OFFSET + 0x34) -#define RT5350_TX_CTX_IDX3 (RT5350_PDMA_OFFSET + 0x38) -#define RT5350_TX_DTX_IDX3 (RT5350_PDMA_OFFSET + 0x3C) -#define RT5350_RX_BASE_PTR0 (RT5350_PDMA_OFFSET + 0x100) -#define RT5350_RX_MAX_CNT0 (RT5350_PDMA_OFFSET + 0x104) -#define RT5350_RX_CALC_IDX0 (RT5350_PDMA_OFFSET + 0x108) -#define RT5350_RX_DRX_IDX0 (RT5350_PDMA_OFFSET + 0x10C) -#define RT5350_RX_BASE_PTR1 (RT5350_PDMA_OFFSET + 0x110) -#define RT5350_RX_MAX_CNT1 (RT5350_PDMA_OFFSET + 0x114) -#define RT5350_RX_CALC_IDX1 (RT5350_PDMA_OFFSET + 0x118) -#define RT5350_RX_DRX_IDX1 (RT5350_PDMA_OFFSET + 0x11C) -#define RT5350_PDMA_GLO_CFG (RT5350_PDMA_OFFSET + 0x204) -#define RT5350_PDMA_RST_CFG (RT5350_PDMA_OFFSET + 0x208) -#define RT5350_DLY_INT_CFG (RT5350_PDMA_OFFSET + 0x20c) -#define RT5350_MTK_INT_STATUS (RT5350_PDMA_OFFSET + 0x220) -#define RT5350_MTK_INT_ENABLE (RT5350_PDMA_OFFSET + 0x228) -#define RT5350_PDMA_SCH_CFG (RT5350_PDMA_OFFSET + 0x280) - -#define MTK_PDMA_GLO_CFG (MTK_PDMA_OFFSET + 0x00) -#define MTK_PDMA_RST_CFG (MTK_PDMA_OFFSET + 0x04) -#define MTK_PDMA_SCH_CFG (MTK_PDMA_OFFSET + 0x08) -#define MTK_DLY_INT_CFG (MTK_PDMA_OFFSET + 0x0C) -#define MTK_TX_BASE_PTR0 (MTK_PDMA_OFFSET + 0x10) -#define MTK_TX_MAX_CNT0 (MTK_PDMA_OFFSET + 0x14) -#define MTK_TX_CTX_IDX0 (MTK_PDMA_OFFSET + 0x18) -#define MTK_TX_DTX_IDX0 (MTK_PDMA_OFFSET + 0x1C) -#define MTK_TX_BASE_PTR1 (MTK_PDMA_OFFSET + 0x20) -#define MTK_TX_MAX_CNT1 (MTK_PDMA_OFFSET + 0x24) -#define MTK_TX_CTX_IDX1 (MTK_PDMA_OFFSET + 0x28) -#define MTK_TX_DTX_IDX1 (MTK_PDMA_OFFSET + 0x2C) -#define MTK_RX_BASE_PTR0 (MTK_PDMA_OFFSET + 0x30) -#define MTK_RX_MAX_CNT0 (MTK_PDMA_OFFSET + 0x34) -#define MTK_RX_CALC_IDX0 (MTK_PDMA_OFFSET + 0x38) -#define MTK_RX_DRX_IDX0 (MTK_PDMA_OFFSET + 0x3C) -#define MTK_TX_BASE_PTR2 (MTK_PDMA_OFFSET + 0x40) -#define MTK_TX_MAX_CNT2 (MTK_PDMA_OFFSET + 0x44) -#define MTK_TX_CTX_IDX2 (MTK_PDMA_OFFSET + 0x48) -#define MTK_TX_DTX_IDX2 (MTK_PDMA_OFFSET + 0x4C) -#define MTK_TX_BASE_PTR3 (MTK_PDMA_OFFSET + 0x50) -#define MTK_TX_MAX_CNT3 (MTK_PDMA_OFFSET + 0x54) -#define MTK_TX_CTX_IDX3 (MTK_PDMA_OFFSET + 0x58) -#define MTK_TX_DTX_IDX3 (MTK_PDMA_OFFSET + 0x5C) -#define MTK_RX_BASE_PTR1 (MTK_PDMA_OFFSET + 0x60) -#define MTK_RX_MAX_CNT1 (MTK_PDMA_OFFSET + 0x64) -#define MTK_RX_CALC_IDX1 (MTK_PDMA_OFFSET + 0x68) -#define MTK_RX_DRX_IDX1 (MTK_PDMA_OFFSET + 0x6C) - -/* Switch DMA configuration */ -#define RT5350_SDM_CFG (RT5350_SDM_OFFSET + 0x00) -#define RT5350_SDM_RRING (RT5350_SDM_OFFSET + 0x04) -#define RT5350_SDM_TRING (RT5350_SDM_OFFSET + 0x08) -#define RT5350_SDM_MAC_ADRL (RT5350_SDM_OFFSET + 0x0C) -#define RT5350_SDM_MAC_ADRH (RT5350_SDM_OFFSET + 0x10) -#define RT5350_SDM_TPCNT (RT5350_SDM_OFFSET + 0x100) -#define RT5350_SDM_TBCNT (RT5350_SDM_OFFSET + 0x104) -#define RT5350_SDM_RPCNT (RT5350_SDM_OFFSET + 0x108) -#define RT5350_SDM_RBCNT (RT5350_SDM_OFFSET + 0x10C) -#define RT5350_SDM_CS_ERR (RT5350_SDM_OFFSET + 0x110) - -#define RT5350_SDM_ICS_EN BIT(16) -#define RT5350_SDM_TCS_EN BIT(17) -#define RT5350_SDM_UCS_EN BIT(18) - -/* QDMA registers */ -#define MTK_QTX_CFG(x) (0x1800 + (x * 0x10)) -#define MTK_QTX_SCH(x) (0x1804 + (x * 0x10)) -#define MTK_QRX_BASE_PTR0 0x1900 -#define MTK_QRX_MAX_CNT0 0x1904 -#define MTK_QRX_CRX_IDX0 0x1908 -#define MTK_QRX_DRX_IDX0 0x190C -#define MTK_QDMA_GLO_CFG 0x1A04 -#define MTK_QDMA_RST_IDX 0x1A08 -#define MTK_QDMA_DELAY_INT 0x1A0C -#define MTK_QDMA_FC_THRES 0x1A10 -#define MTK_QMTK_INT_STATUS 0x1A18 -#define MTK_QMTK_INT_ENABLE 0x1A1C -#define MTK_QDMA_HRED2 0x1A44 - -#define MTK_QTX_CTX_PTR 0x1B00 -#define MTK_QTX_DTX_PTR 0x1B04 - -#define MTK_QTX_CRX_PTR 0x1B10 -#define MTK_QTX_DRX_PTR 0x1B14 - -#define MTK_QDMA_FQ_HEAD 0x1B20 -#define MTK_QDMA_FQ_TAIL 0x1B24 -#define MTK_QDMA_FQ_CNT 0x1B28 -#define MTK_QDMA_FQ_BLEN 0x1B2C - -#define QDMA_PAGE_SIZE 2048 -#define QDMA_TX_OWNER_CPU BIT(31) -#define QDMA_TX_SWC BIT(14) -#define TX_QDMA_SDL(_x) (((_x) & 0x3fff) << 16) -#define QDMA_RES_THRES 4 - -/* MDIO_CFG register bits */ -#define MTK_MDIO_CFG_AUTO_POLL_EN BIT(29) -#define MTK_MDIO_CFG_GP1_BP_EN BIT(16) -#define MTK_MDIO_CFG_GP1_FRC_EN BIT(15) -#define MTK_MDIO_CFG_GP1_SPEED_10 (0 << 13) -#define MTK_MDIO_CFG_GP1_SPEED_100 (1 << 13) -#define MTK_MDIO_CFG_GP1_SPEED_1000 (2 << 13) -#define MTK_MDIO_CFG_GP1_DUPLEX BIT(12) -#define MTK_MDIO_CFG_GP1_FC_TX BIT(11) -#define MTK_MDIO_CFG_GP1_FC_RX BIT(10) -#define MTK_MDIO_CFG_GP1_LNK_DWN BIT(9) -#define MTK_MDIO_CFG_GP1_AN_FAIL BIT(8) -#define MTK_MDIO_CFG_MDC_CLK_DIV_1 (0 << 6) -#define MTK_MDIO_CFG_MDC_CLK_DIV_2 (1 << 6) -#define MTK_MDIO_CFG_MDC_CLK_DIV_4 (2 << 6) -#define MTK_MDIO_CFG_MDC_CLK_DIV_8 (3 << 6) -#define MTK_MDIO_CFG_TURBO_MII_FREQ BIT(5) -#define MTK_MDIO_CFG_TURBO_MII_MODE BIT(4) -#define MTK_MDIO_CFG_RX_CLK_SKEW_0 (0 << 2) -#define MTK_MDIO_CFG_RX_CLK_SKEW_200 (1 << 2) -#define MTK_MDIO_CFG_RX_CLK_SKEW_400 (2 << 2) -#define MTK_MDIO_CFG_RX_CLK_SKEW_INV (3 << 2) -#define MTK_MDIO_CFG_TX_CLK_SKEW_0 0 -#define MTK_MDIO_CFG_TX_CLK_SKEW_200 1 -#define MTK_MDIO_CFG_TX_CLK_SKEW_400 2 -#define MTK_MDIO_CFG_TX_CLK_SKEW_INV 3 - -/* uni-cast port */ -#define MTK_GDM1_JMB_LEN_MASK 0xf -#define MTK_GDM1_JMB_LEN_SHIFT 28 -#define MTK_GDM1_ICS_EN BIT(22) -#define MTK_GDM1_TCS_EN BIT(21) -#define MTK_GDM1_UCS_EN BIT(20) -#define MTK_GDM1_JMB_EN BIT(19) -#define MTK_GDM1_STRPCRC BIT(16) -#define MTK_GDM1_UFRC_P_CPU (0 << 12) -#define MTK_GDM1_UFRC_P_GDMA1 (1 << 12) -#define MTK_GDM1_UFRC_P_PPE (6 << 12) - -/* checksums */ -#define MTK_ICS_GEN_EN BIT(2) -#define MTK_UCS_GEN_EN BIT(1) -#define MTK_TCS_GEN_EN BIT(0) - -/* dma mode */ -#define MTK_PDMA BIT(0) -#define MTK_QDMA BIT(1) -#define MTK_PDMA_RX_QDMA_TX (MTK_PDMA | MTK_QDMA) - -/* dma ring */ -#define MTK_PST_DRX_IDX0 BIT(16) -#define MTK_PST_DTX_IDX3 BIT(3) -#define MTK_PST_DTX_IDX2 BIT(2) -#define MTK_PST_DTX_IDX1 BIT(1) -#define MTK_PST_DTX_IDX0 BIT(0) - -#define MTK_RX_2B_OFFSET BIT(31) -#define MTK_TX_WB_DDONE BIT(6) -#define MTK_RX_DMA_BUSY BIT(3) -#define MTK_TX_DMA_BUSY BIT(1) -#define MTK_RX_DMA_EN BIT(2) -#define MTK_TX_DMA_EN BIT(0) - -#define MTK_PDMA_SIZE_4DWORDS (0 << 4) -#define MTK_PDMA_SIZE_8DWORDS (1 << 4) -#define MTK_PDMA_SIZE_16DWORDS (2 << 4) - -#define MTK_US_CYC_CNT_MASK 0xff -#define MTK_US_CYC_CNT_SHIFT 0x8 -#define MTK_US_CYC_CNT_DIVISOR 1000000 - -/* PDMA descriptor rxd2 */ -#define RX_DMA_DONE BIT(31) -#define RX_DMA_LSO BIT(30) -#define RX_DMA_PLEN0(_x) (((_x) & 0x3fff) << 16) -#define RX_DMA_GET_PLEN0(_x) (((_x) >> 16) & 0x3fff) -#define RX_DMA_TAG BIT(15) - -/* PDMA descriptor rxd3 */ -#define RX_DMA_TPID(_x) (((_x) >> 16) & 0xffff) -#define RX_DMA_VID(_x) ((_x) & 0xfff) - -/* PDMA descriptor rxd4 */ -#define RX_DMA_L4VALID BIT(30) -#define RX_DMA_FPORT_SHIFT 19 -#define RX_DMA_FPORT_MASK 0x7 - -struct mtk_rx_dma { - unsigned int rxd1; - unsigned int rxd2; - unsigned int rxd3; - unsigned int rxd4; -} __packed __aligned(4); - -/* PDMA tx descriptor bits */ -#define TX_DMA_BUF_LEN 0x3fff -#define TX_DMA_PLEN0_MASK (TX_DMA_BUF_LEN << 16) -#define TX_DMA_PLEN0(_x) (((_x) & TX_DMA_BUF_LEN) << 16) -#define TX_DMA_PLEN1(_x) ((_x) & TX_DMA_BUF_LEN) -#define TX_DMA_GET_PLEN0(_x) (((_x) >> 16) & TX_DMA_BUF_LEN) -#define TX_DMA_GET_PLEN1(_x) ((_x) & TX_DMA_BUF_LEN) -#define TX_DMA_LS1 BIT(14) -#define TX_DMA_LS0 BIT(30) -#define TX_DMA_DONE BIT(31) -#define TX_DMA_FPORT_SHIFT 25 -#define TX_DMA_FPORT_MASK 0x7 -#define TX_DMA_INS_VLAN_MT7621 BIT(16) -#define TX_DMA_INS_VLAN BIT(7) -#define TX_DMA_INS_PPPOE BIT(12) -#define TX_DMA_TAG BIT(15) -#define TX_DMA_TAG_MASK BIT(15) -#define TX_DMA_QN(_x) ((_x) << 16) -#define TX_DMA_PN(_x) ((_x) << 24) -#define TX_DMA_QN_MASK TX_DMA_QN(0x7) -#define TX_DMA_PN_MASK TX_DMA_PN(0x7) -#define TX_DMA_UDF BIT(20) -#define TX_DMA_CHKSUM (0x7 << 29) -#define TX_DMA_TSO BIT(28) -#define TX_DMA_DESP4_DEF (TX_DMA_QN(3) | TX_DMA_PN(1)) - -/* frame engine counters */ -#define MTK_PPE_AC_BCNT0 (MTK_CMTABLE_OFFSET + 0x00) -#define MTK_GDMA1_TX_GBCNT (MTK_CMTABLE_OFFSET + 0x300) -#define MTK_GDMA2_TX_GBCNT (MTK_GDMA1_TX_GBCNT + 0x40) - -/* phy device flags */ -#define MTK_PHY_FLAG_PORT BIT(0) -#define MTK_PHY_FLAG_ATTACH BIT(1) - -struct mtk_tx_dma { - unsigned int txd1; - unsigned int txd2; - unsigned int txd3; - unsigned int txd4; -} __packed __aligned(4); - -struct mtk_eth; -struct mtk_mac; - -/* manage the attached phys */ -struct mtk_phy { - spinlock_t lock; - - struct phy_device *phy[8]; - struct device_node *phy_node[8]; - const __be32 *phy_fixed[8]; - int duplex[8]; - int speed[8]; - int tx_fc[8]; - int rx_fc[8]; - int (*connect)(struct mtk_mac *mac); - void (*disconnect)(struct mtk_mac *mac); - void (*start)(struct mtk_mac *mac); - void (*stop)(struct mtk_mac *mac); -}; - -/* struct mtk_soc_data - the structure that holds the SoC specific data - * @reg_table: Some of the legacy registers changed their location - * over time. Their offsets are stored in this table - * - * @init_data: Some features depend on the silicon revision. This - * callback allows runtime modification of the content of - * this struct - * @reset_fe: This callback is used to trigger the reset of the frame - * engine - * @set_mac: This callback is used to set the unicast mac address - * filter - * @fwd_config: This callback is used to setup the forward config - * register of the MAC - * @switch_init: This callback is used to bring up the switch core - * @port_init: Some SoCs have ports that can be router to a switch port - * or an external PHY. This callback is used to setup these - * ports. - * @has_carrier: This callback allows driver to check if there is a cable - * attached. - * @mdio_init: This callbck is used to setup the MDIO bus if one is - * present - * @mdio_cleanup: This callback is used to cleanup the MDIO state. - * @mdio_write: This callback is used to write data to the MDIO bus. - * @mdio_read: This callback is used to write data to the MDIO bus. - * @mdio_adjust_link: This callback is used to apply the PHY settings. - * @piac_offset: the PIAC register has a different different base offset - * @hw_features: feature set depends on the SoC type - * @dma_ring_size: allow GBit SoCs to set bigger rings than FE SoCs - * @napi_weight: allow GBit SoCs to set bigger napi weight than FE SoCs - * @dma_type: SoCs is PDMA, QDMA or a mix of the 2 - * @pdma_glo_cfg: the default DMA configuration - * @rx_int: the TX interrupt bits used by the SoC - * @tx_int: the TX interrupt bits used by the SoC - * @status_int: the Status interrupt bits used by the SoC - * @checksum_bit: the bits used to turn on HW checksumming - * @txd4: default value of the TXD4 descriptor - * @mac_count: the number of MACs that the SoC has - * @new_stats: there is a old and new way to read hardware stats - * registers - * @jumbo_frame: does the SoC support jumbo frames ? - * @rx_2b_offset: tell the rx dma to offset the data by 2 bytes - * @rx_sg_dma: scatter gather support - * @padding_64b enable 64 bit padding - * @padding_bug: rt2880 has a padding bug - * @has_switch: does the SoC have a built-in switch - * - * Although all of the supported SoCs share the same basic functionality, there - * are several SoC specific functions and features that we need to support. This - * struct holds the SoC specific data so that the common core can figure out - * how to setup and use these differences. - */ -struct mtk_soc_data { - const u16 *reg_table; - - void (*init_data)(struct mtk_soc_data *data, struct net_device *netdev); - void (*reset_fe)(struct mtk_eth *eth); - void (*set_mac)(struct mtk_mac *mac, unsigned char *macaddr); - int (*fwd_config)(struct mtk_eth *eth); - int (*switch_init)(struct mtk_eth *eth); - void (*port_init)(struct mtk_eth *eth, struct mtk_mac *mac, - struct device_node *port); - int (*has_carrier)(struct mtk_eth *eth); - int (*mdio_init)(struct mtk_eth *eth); - void (*mdio_cleanup)(struct mtk_eth *eth); - int (*mdio_write)(struct mii_bus *bus, int phy_addr, int phy_reg, - u16 val); - int (*mdio_read)(struct mii_bus *bus, int phy_addr, int phy_reg); - void (*mdio_adjust_link)(struct mtk_eth *eth, int port); - u32 piac_offset; - netdev_features_t hw_features; - u32 dma_ring_size; - u32 napi_weight; - u32 dma_type; - u32 pdma_glo_cfg; - u32 rx_int; - u32 tx_int; - u32 status_int; - u32 checksum_bit; - u32 txd4; - u32 mac_count; - - u32 new_stats:1; - u32 jumbo_frame:1; - u32 rx_2b_offset:1; - u32 rx_sg_dma:1; - u32 padding_64b:1; - u32 padding_bug:1; - u32 has_switch:1; -}; - -#define MTK_STAT_OFFSET 0x40 - -/* struct mtk_hw_stats - the structure that holds the traffic statistics. - * @stats_lock: make sure that stats operations are atomic - * @reg_offset: the status register offset of the SoC - * @syncp: the refcount - * - * All of the supported SoCs have hardware counters for traffic statstics. - * Whenever the status IRQ triggers we can read the latest stats from these - * counters and store them in this struct. - */ -struct mtk_hw_stats { - spinlock_t stats_lock; - u32 reg_offset; - struct u64_stats_sync syncp; - - u64 tx_bytes; - u64 tx_packets; - u64 tx_skip; - u64 tx_collisions; - u64 rx_bytes; - u64 rx_packets; - u64 rx_overflow; - u64 rx_fcs_errors; - u64 rx_short_errors; - u64 rx_long_errors; - u64 rx_checksum_errors; - u64 rx_flow_control_packets; -}; - -/* PDMA descriptor can point at 1-2 segments. This enum allows us to track how - * memory was allocated so that it can be freed properly - */ -enum mtk_tx_flags { - MTK_TX_FLAGS_SINGLE0 = 0x01, - MTK_TX_FLAGS_PAGE0 = 0x02, - MTK_TX_FLAGS_PAGE1 = 0x04, -}; - -/* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at - * by the TX descriptor s - * @skb: The SKB pointer of the packet being sent - * @dma_addr0: The base addr of the first segment - * @dma_len0: The length of the first segment - * @dma_addr1: The base addr of the second segment - * @dma_len1: The length of the second segment - */ -struct mtk_tx_buf { - struct sk_buff *skb; - u32 flags; - DEFINE_DMA_UNMAP_ADDR(dma_addr0); - DEFINE_DMA_UNMAP_LEN(dma_len0); - DEFINE_DMA_UNMAP_ADDR(dma_addr1); - DEFINE_DMA_UNMAP_LEN(dma_len1); -}; - -/* struct mtk_tx_ring - This struct holds info describing a TX ring - * @tx_dma: The descriptor ring - * @tx_buf: The memory pointed at by the ring - * @tx_phys: The physical addr of tx_buf - * @tx_next_free: Pointer to the next free descriptor - * @tx_last_free: Pointer to the last free descriptor - * @tx_thresh: The threshold of minimum amount of free descriptors - * @tx_map: Callback to map a new packet into the ring - * @tx_poll: Callback for the housekeeping function - * @tx_clean: Callback for the cleanup function - * @tx_ring_size: How many descriptors are in the ring - * @tx_free_idx: The index of th next free descriptor - * @tx_next_idx: QDMA uses a linked list. This element points to the next - * free descriptor in the list - * @tx_free_count: QDMA uses a linked list. Track how many free descriptors - * are present - */ -struct mtk_tx_ring { - struct mtk_tx_dma *tx_dma; - struct mtk_tx_buf *tx_buf; - dma_addr_t tx_phys; - struct mtk_tx_dma *tx_next_free; - struct mtk_tx_dma *tx_last_free; - u16 tx_thresh; - int (*tx_map)(struct sk_buff *skb, struct net_device *dev, int tx_num, - struct mtk_tx_ring *ring, bool gso); - int (*tx_poll)(struct mtk_eth *eth, int budget, bool *tx_again); - void (*tx_clean)(struct mtk_eth *eth); - - /* PDMA only */ - u16 tx_ring_size; - u16 tx_free_idx; - - /* QDMA only */ - u16 tx_next_idx; - atomic_t tx_free_count; -}; - -/* struct mtk_rx_ring - This struct holds info describing a RX ring - * @rx_dma: The descriptor ring - * @rx_data: The memory pointed at by the ring - * @trx_phys: The physical addr of rx_buf - * @rx_ring_size: How many descriptors are in the ring - * @rx_buf_size: The size of each packet buffer - * @rx_calc_idx: The current head of ring - */ -struct mtk_rx_ring { - struct mtk_rx_dma *rx_dma; - u8 **rx_data; - dma_addr_t rx_phys; - u16 rx_ring_size; - u16 frag_size; - u16 rx_buf_size; - u16 rx_calc_idx; -}; - -/* currently no SoC has more than 2 macs */ -#define MTK_MAX_DEVS 2 - -/* struct mtk_eth - This is the main datasructure for holding the state - * of the driver - * @dev: The device pointer - * @base: The mapped register i/o base - * @page_lock: Make sure that register operations are atomic - * @soc: pointer to our SoC specific data - * @dummy_dev: we run 2 netdevs on 1 physical DMA ring and need a - * dummy for NAPI to work - * @netdev: The netdev instances - * @mac: Each netdev is linked to a physical MAC - * @switch_np: The phandle for the switch - * @irq: The IRQ that we are using - * @msg_enable: Ethtool msg level - * @ysclk: The sysclk rate - neeed for calibration - * @ethsys: The register map pointing at the range used to setup - * MII modes - * @dma_refcnt: track how many netdevs are using the DMA engine - * @tx_ring: Pointer to the memore holding info about the TX ring - * @rx_ring: Pointer to the memore holding info about the RX ring - * @rx_napi: The NAPI struct - * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring - * @scratch_head: The scratch memory that scratch_ring points to. - * @phy: Info about the attached PHYs - * @mii_bus: If there is a bus we need to create an instance for it - * @link: Track if the ports have a physical link - * @sw_priv: Pointer to the switches private data - * @vlan_map: RX VID tracking - */ - -struct mtk_eth { - struct device *dev; - void __iomem *base; - spinlock_t page_lock; - struct mtk_soc_data *soc; - struct net_device dummy_dev; - struct net_device *netdev[MTK_MAX_DEVS]; - struct mtk_mac *mac[MTK_MAX_DEVS]; - struct device_node *switch_np; - int irq; - u32 msg_enable; - unsigned long sysclk; - struct regmap *ethsys; - atomic_t dma_refcnt; - struct mtk_tx_ring tx_ring; - struct mtk_rx_ring rx_ring[2]; - struct napi_struct rx_napi; - struct mtk_tx_dma *scratch_ring; - void *scratch_head; - struct mtk_phy *phy; - struct mii_bus *mii_bus; - int link[8]; - void *sw_priv; - unsigned long vlan_map; -}; - -/* struct mtk_mac - the structure that holds the info about the MACs of the - * SoC - * @id: The number of the MAC - * @of_node: Our devicetree node - * @hw: Backpointer to our main datastruture - * @hw_stats: Packet statistics counter - * @phy_dev: The attached PHY if available - * @phy_flags: The PHYs flags - * @pending_work: The workqueue used to reset the dma ring - */ -struct mtk_mac { - int id; - struct device_node *of_node; - struct mtk_eth *hw; - struct mtk_hw_stats *hw_stats; - struct phy_device *phy_dev; - u32 phy_flags; - struct work_struct pending_work; -}; - -/* the struct describing the SoC. these are declared in the soc_xyz.c files */ -extern const struct of_device_id of_mtk_match[]; - -/* read the hardware status register */ -void mtk_stats_update_mac(struct mtk_mac *mac); - -/* default checksum setup handler */ -void mtk_reset(struct mtk_eth *eth, u32 reset_bits); - -/* register i/o wrappers */ -void mtk_w32(struct mtk_eth *eth, u32 val, unsigned int reg); -u32 mtk_r32(struct mtk_eth *eth, unsigned int reg); - -/* default clock calibration handler */ -int mtk_set_clock_cycle(struct mtk_eth *eth); - -/* default checksum setup handler */ -void mtk_csum_config(struct mtk_eth *eth); - -/* default forward config handler */ -void mtk_fwd_config(struct mtk_eth *eth); - -#endif /* MTK_ETH_H */ diff --git a/drivers/staging/mt7621-eth/soc_mt7621.c b/drivers/staging/mt7621-eth/soc_mt7621.c deleted file mode 100644 index 5d63b5d96f6b..000000000000 --- a/drivers/staging/mt7621-eth/soc_mt7621.c +++ /dev/null @@ -1,161 +0,0 @@ -/* This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Copyright (C) 2009-2016 John Crispin - * Copyright (C) 2009-2016 Felix Fietkau - * Copyright (C) 2013-2016 Michael Lee - */ - -#include -#include -#include -#include - -#include - -#include "mtk_eth_soc.h" -#include "gsw_mt7620.h" -#include "mdio.h" - -#define MT7620_CDMA_CSG_CFG 0x400 -#define MT7621_CDMP_IG_CTRL (MT7620_CDMA_CSG_CFG + 0x00) -#define MT7621_CDMP_EG_CTRL (MT7620_CDMA_CSG_CFG + 0x04) -#define MT7621_RESET_FE BIT(6) -#define MT7621_L4_VALID BIT(24) - -#define MT7621_TX_DMA_UDF BIT(19) - -#define CDMA_ICS_EN BIT(2) -#define CDMA_UCS_EN BIT(1) -#define CDMA_TCS_EN BIT(0) - -#define GDMA_ICS_EN BIT(22) -#define GDMA_TCS_EN BIT(21) -#define GDMA_UCS_EN BIT(20) - -/* frame engine counters */ -#define MT7621_REG_MIB_OFFSET 0x2000 -#define MT7621_PPE_AC_BCNT0 (MT7621_REG_MIB_OFFSET + 0x00) -#define MT7621_GDM1_TX_GBCNT (MT7621_REG_MIB_OFFSET + 0x400) -#define MT7621_GDM2_TX_GBCNT (MT7621_GDM1_TX_GBCNT + 0x40) - -#define GSW_REG_GDMA1_MAC_ADRL 0x508 -#define GSW_REG_GDMA1_MAC_ADRH 0x50C -#define GSW_REG_GDMA2_MAC_ADRL 0x1508 -#define GSW_REG_GDMA2_MAC_ADRH 0x150C - -#define MT7621_MTK_RST_GL 0x04 -#define MT7620_MTK_INT_STATUS2 0x08 - -/* MTK_INT_STATUS reg on mt7620 define CNT_GDM1_AF at BIT(29) - * but after test it should be BIT(13). - */ -#define MT7621_MTK_GDM1_AF BIT(28) -#define MT7621_MTK_GDM2_AF BIT(29) - -static const u16 mt7621_reg_table[MTK_REG_COUNT] = { - [MTK_REG_PDMA_GLO_CFG] = RT5350_PDMA_GLO_CFG, - [MTK_REG_PDMA_RST_CFG] = RT5350_PDMA_RST_CFG, - [MTK_REG_DLY_INT_CFG] = RT5350_DLY_INT_CFG, - [MTK_REG_TX_BASE_PTR0] = RT5350_TX_BASE_PTR0, - [MTK_REG_TX_MAX_CNT0] = RT5350_TX_MAX_CNT0, - [MTK_REG_TX_CTX_IDX0] = RT5350_TX_CTX_IDX0, - [MTK_REG_TX_DTX_IDX0] = RT5350_TX_DTX_IDX0, - [MTK_REG_RX_BASE_PTR0] = RT5350_RX_BASE_PTR0, - [MTK_REG_RX_MAX_CNT0] = RT5350_RX_MAX_CNT0, - [MTK_REG_RX_CALC_IDX0] = RT5350_RX_CALC_IDX0, - [MTK_REG_RX_DRX_IDX0] = RT5350_RX_DRX_IDX0, - [MTK_REG_MTK_INT_ENABLE] = RT5350_MTK_INT_ENABLE, - [MTK_REG_MTK_INT_STATUS] = RT5350_MTK_INT_STATUS, - [MTK_REG_MTK_DMA_VID_BASE] = 0, - [MTK_REG_MTK_COUNTER_BASE] = MT7621_GDM1_TX_GBCNT, - [MTK_REG_MTK_RST_GL] = MT7621_MTK_RST_GL, - [MTK_REG_MTK_INT_STATUS2] = MT7620_MTK_INT_STATUS2, -}; - -static void mt7621_mtk_reset(struct mtk_eth *eth) -{ - mtk_reset(eth, MT7621_RESET_FE); -} - -static int mt7621_fwd_config(struct mtk_eth *eth) -{ - /* Setup GMAC1 only, there is no support for GMAC2 yet */ - mtk_w32(eth, mtk_r32(eth, MT7620_GDMA1_FWD_CFG) & ~0xffff, - MT7620_GDMA1_FWD_CFG); - - /* Enable RX checksum */ - mtk_w32(eth, mtk_r32(eth, MT7620_GDMA1_FWD_CFG) | (GDMA_ICS_EN | - GDMA_TCS_EN | GDMA_UCS_EN), - MT7620_GDMA1_FWD_CFG); - - /* Enable RX VLan Offloading */ - mtk_w32(eth, 0, MT7621_CDMP_EG_CTRL); - - return 0; -} - -static void mt7621_set_mac(struct mtk_mac *mac, unsigned char *hwaddr) -{ - unsigned long flags; - - spin_lock_irqsave(&mac->hw->page_lock, flags); - if (mac->id == 0) { - mtk_w32(mac->hw, (hwaddr[0] << 8) | hwaddr[1], - GSW_REG_GDMA1_MAC_ADRH); - mtk_w32(mac->hw, (hwaddr[2] << 24) | (hwaddr[3] << 16) | - (hwaddr[4] << 8) | hwaddr[5], - GSW_REG_GDMA1_MAC_ADRL); - } - if (mac->id == 1) { - mtk_w32(mac->hw, (hwaddr[0] << 8) | hwaddr[1], - GSW_REG_GDMA2_MAC_ADRH); - mtk_w32(mac->hw, (hwaddr[2] << 24) | (hwaddr[3] << 16) | - (hwaddr[4] << 8) | hwaddr[5], - GSW_REG_GDMA2_MAC_ADRL); - } - spin_unlock_irqrestore(&mac->hw->page_lock, flags); -} - -static struct mtk_soc_data mt7621_data = { - .hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | - NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | - NETIF_F_IPV6_CSUM, - .dma_type = MTK_PDMA, - .dma_ring_size = 256, - .napi_weight = 64, - .new_stats = 1, - .padding_64b = 1, - .rx_2b_offset = 1, - .rx_sg_dma = 1, - .has_switch = 1, - .mac_count = 2, - .reset_fe = mt7621_mtk_reset, - .set_mac = mt7621_set_mac, - .fwd_config = mt7621_fwd_config, - .switch_init = mtk_gsw_init, - .reg_table = mt7621_reg_table, - .pdma_glo_cfg = MTK_PDMA_SIZE_16DWORDS, - .rx_int = RT5350_RX_DONE_INT, - .tx_int = RT5350_TX_DONE_INT, - .status_int = MT7621_MTK_GDM1_AF | MT7621_MTK_GDM2_AF, - .checksum_bit = MT7621_L4_VALID, - .has_carrier = mt7620_has_carrier, - .mdio_read = mt7620_mdio_read, - .mdio_write = mt7620_mdio_write, - .mdio_adjust_link = mt7620_mdio_link_adjust, -}; - -const struct of_device_id of_mtk_match[] = { - { .compatible = "mediatek,mt7621-eth", .data = &mt7621_data }, - {}, -}; - -MODULE_DEVICE_TABLE(of, of_mtk_match); -- cgit From 4420a5611ea5d42c16628d01784dda7a8260d738 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 12 Mar 2019 10:09:37 +1100 Subject: staging: mt7621-dts: update ethernet settings. The ethernet in mt7621 is now supported by drivers/net/ethernet/mediatek/ which provides support for the integrated switch through DSA. This requires some devicetree changes, and particularly allows a board dts to identify which switch ports are present. The second CPU interface - gmac1 - doesn't work yet, so the device tree information may not be correct. The phy (which is present on the gnubee-pc2) can negotiate and report connection speed etc, but no traffic flows. The gnubee-pc1 has two network ports which are 'black' and 'blue'. There are connected to switch ports 0 and 4 respectively. Signed-off-by: NeilBrown Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-dts/gbpc1.dts | 29 ++++++-------- drivers/staging/mt7621-dts/mt7621.dtsi | 73 ++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 19 deletions(-) diff --git a/drivers/staging/mt7621-dts/gbpc1.dts b/drivers/staging/mt7621-dts/gbpc1.dts index b73385540216..250c15ace2a7 100644 --- a/drivers/staging/mt7621-dts/gbpc1.dts +++ b/drivers/staging/mt7621-dts/gbpc1.dts @@ -117,22 +117,6 @@ status = "okay"; }; -ðernet { - //mtd-mac-address = <&factory 0xe000>; - gmac1: mac@0 { - compatible = "mediatek,eth-mac"; - reg = <0>; - phy-handle = <&phy1>; - }; - - mdio-bus { - phy1: ethernet-phy@1 { - reg = <1>; - phy-mode = "rgmii"; - }; - }; -}; - &pinctrl { state_default: pinctrl0 { gpio { @@ -141,3 +125,16 @@ }; }; }; + +&switch0 { + ports { + port@0 { + label = "ethblack"; + status = "ok"; + }; + port@4 { + label = "ethblue"; + status = "ok"; + }; + }; +}; diff --git a/drivers/staging/mt7621-dts/mt7621.dtsi b/drivers/staging/mt7621-dts/mt7621.dtsi index 6aff3680ce4b..17020e24abd2 100644 --- a/drivers/staging/mt7621-dts/mt7621.dtsi +++ b/drivers/staging/mt7621-dts/mt7621.dtsi @@ -372,16 +372,83 @@ mediatek,ethsys = <ðsys>; - mediatek,switch = <&gsw>; + gmac0: mac@0 { + compatible = "mediatek,eth-mac"; + reg = <0>; + phy-mode = "rgmii"; + fixed-link { + speed = <1000>; + full-duplex; + pause; + }; + }; + gmac1: mac@1 { + compatible = "mediatek,eth-mac"; + reg = <1>; + status = "off"; + phy-mode = "rgmii"; + phy-handle = <&phy5>; + }; mdio-bus { #address-cells = <1>; #size-cells = <0>; - phy1f: ethernet-phy@1f { - reg = <0x1f>; + phy5: ethernet-phy@5 { + reg = <5>; phy-mode = "rgmii"; }; + + switch0: switch0@0 { + compatible = "mediatek,mt7621"; + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + mediatek,mcm; + resets = <&rstctrl 2>; + reset-names = "mcm"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + reg = <0>; + port@0 { + status = "off"; + reg = <0>; + label = "lan0"; + }; + port@1 { + status = "off"; + reg = <1>; + label = "lan1"; + }; + port@2 { + status = "off"; + reg = <2>; + label = "lan2"; + }; + port@3 { + status = "off"; + reg = <3>; + label = "lan3"; + }; + port@4 { + status = "off"; + reg = <4>; + label = "lan4"; + }; + port@6 { + reg = <6>; + label = "cpu"; + ethernet = <&gmac0>; + phy-mode = "trgmii"; + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; }; }; -- cgit From 8bce6dcede65139a087ff240127e3f3c01363eed Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 11 Mar 2019 23:10:10 +0800 Subject: staging: erofs: fix to handle error path of erofs_vmap() erofs_vmap() wrapped vmap() and vm_map_ram() to return virtual continuous memory, but both of them can failed due to a lot of reason, previously, erofs_vmap()'s callers didn't handle them, which can potentially cause NULL pointer access, fix it. Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Fixes: 0d40d6e399c1 ("staging: erofs: add a generic z_erofs VLE decompressor") Cc: # 4.19+ Signed-off-by: Gao Xiang Signed-off-by: Chao Yu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/unzip_vle.c | 4 ++++ drivers/staging/erofs/unzip_vle_lz4.c | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index 8715bc50e09c..c7b3b21123c1 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -1029,6 +1029,10 @@ repeat: skip_allocpage: vout = erofs_vmap(pages, nr_pages); + if (!vout) { + err = -ENOMEM; + goto out; + } err = z_erofs_vle_unzip_vmap(compressed_pages, clusterpages, vout, llen, work->pageofs, overlapped); diff --git a/drivers/staging/erofs/unzip_vle_lz4.c b/drivers/staging/erofs/unzip_vle_lz4.c index 48b263a2731a..0daac9b984a8 100644 --- a/drivers/staging/erofs/unzip_vle_lz4.c +++ b/drivers/staging/erofs/unzip_vle_lz4.c @@ -136,10 +136,13 @@ int z_erofs_vle_unzip_fast_percpu(struct page **compressed_pages, nr_pages = DIV_ROUND_UP(outlen + pageofs, PAGE_SIZE); - if (clusterpages == 1) + if (clusterpages == 1) { vin = kmap_atomic(compressed_pages[0]); - else + } else { vin = erofs_vmap(compressed_pages, clusterpages); + if (!vin) + return -ENOMEM; + } preempt_disable(); vout = erofs_pcpubuf[smp_processor_id()].data; -- cgit From bafd9c64056cd034a1174dcadb65cd3b294ff8f6 Mon Sep 17 00:00:00 2001 From: Ian Abbott Date: Mon, 4 Mar 2019 14:33:54 +0000 Subject: staging: comedi: ni_mio_common: Fix divide-by-zero for DIO cmdtest `ni_cdio_cmdtest()` validates Comedi asynchronous commands for the DIO subdevice (subdevice 2) of supported National Instruments M-series cards. It is called when handling the `COMEDI_CMD` and `COMEDI_CMDTEST` ioctls for this subdevice. There are two causes for a possible divide-by-zero error when validating that the `stop_arg` member of the passed-in command is not too large. The first cause for the divide-by-zero is that calls to `comedi_bytes_per_scan()` are only valid once the command has been copied to `s->async->cmd`, but that copy is only done for the `COMEDI_CMD` ioctl. For the `COMEDI_CMDTEST` ioctl, it will use whatever was left there by the previous `COMEDI_CMD` ioctl, if any. (This is very likely, as it is usual for the application to use `COMEDI_CMDTEST` before `COMEDI_CMD`.) If there has been no previous, valid `COMEDI_CMD` for this subdevice, then `comedi_bytes_per_scan()` will return 0, so the subsequent division in `ni_cdio_cmdtest()` of `s->async->prealloc_bufsz / comedi_bytes_per_scan(s)` will be a divide-by-zero error. To fix this error, call a new function `comedi_bytes_per_scan_cmd(s, cmd)`, based on the existing `comedi_bytes_per_scan(s)` but using a specified `struct comedi_cmd` for its calculations. (Also refactor `comedi_bytes_per_scan()` to call the new function.) Once the first cause for the divide-by-zero has been fixed, the second cause is that `comedi_bytes_per_scan_cmd()` can legitimately return 0 if the `scan_end_arg` member of the `struct comedi_cmd` being tested is 0. Fix it by only performing the division (and validating that `stop_arg` is no more than the maximum value) if `comedi_bytes_per_scan_cmd()` returns a non-zero value. The problem was reported on the COMEDI mailing list here: https://groups.google.com/forum/#!topic/comedi_list/4t9WlHzMhKM Reported-by: Ivan Vasilyev Tested-by: Ivan Vasilyev Fixes: f164cbf98fa8 ("staging: comedi: ni_mio_common: add finite regeneration to dio output") Cc: # 4.6+ Cc: Spencer E. Olson Signed-off-by: Ian Abbott Signed-off-by: Greg Kroah-Hartman --- drivers/staging/comedi/comedidev.h | 2 ++ drivers/staging/comedi/drivers.c | 33 ++++++++++++++++++++++---- drivers/staging/comedi/drivers/ni_mio_common.c | 10 +++++--- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/drivers/staging/comedi/comedidev.h b/drivers/staging/comedi/comedidev.h index a7d569cfca5d..0dff1ac057cd 100644 --- a/drivers/staging/comedi/comedidev.h +++ b/drivers/staging/comedi/comedidev.h @@ -1001,6 +1001,8 @@ int comedi_dio_insn_config(struct comedi_device *dev, unsigned int mask); unsigned int comedi_dio_update_state(struct comedi_subdevice *s, unsigned int *data); +unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s, + struct comedi_cmd *cmd); unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s); unsigned int comedi_nscans_left(struct comedi_subdevice *s, unsigned int nscans); diff --git a/drivers/staging/comedi/drivers.c b/drivers/staging/comedi/drivers.c index eefa62f42c0f..5a32b8fc000e 100644 --- a/drivers/staging/comedi/drivers.c +++ b/drivers/staging/comedi/drivers.c @@ -394,11 +394,13 @@ unsigned int comedi_dio_update_state(struct comedi_subdevice *s, EXPORT_SYMBOL_GPL(comedi_dio_update_state); /** - * comedi_bytes_per_scan() - Get length of asynchronous command "scan" in bytes + * comedi_bytes_per_scan_cmd() - Get length of asynchronous command "scan" in + * bytes * @s: COMEDI subdevice. + * @cmd: COMEDI command. * * Determines the overall scan length according to the subdevice type and the - * number of channels in the scan. + * number of channels in the scan for the specified command. * * For digital input, output or input/output subdevices, samples for * multiple channels are assumed to be packed into one or more unsigned @@ -408,9 +410,9 @@ EXPORT_SYMBOL_GPL(comedi_dio_update_state); * * Returns the overall scan length in bytes. */ -unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s) +unsigned int comedi_bytes_per_scan_cmd(struct comedi_subdevice *s, + struct comedi_cmd *cmd) { - struct comedi_cmd *cmd = &s->async->cmd; unsigned int num_samples; unsigned int bits_per_sample; @@ -427,6 +429,29 @@ unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s) } return comedi_samples_to_bytes(s, num_samples); } +EXPORT_SYMBOL_GPL(comedi_bytes_per_scan_cmd); + +/** + * comedi_bytes_per_scan() - Get length of asynchronous command "scan" in bytes + * @s: COMEDI subdevice. + * + * Determines the overall scan length according to the subdevice type and the + * number of channels in the scan for the current command. + * + * For digital input, output or input/output subdevices, samples for + * multiple channels are assumed to be packed into one or more unsigned + * short or unsigned int values according to the subdevice's %SDF_LSAMPL + * flag. For other types of subdevice, samples are assumed to occupy a + * whole unsigned short or unsigned int according to the %SDF_LSAMPL flag. + * + * Returns the overall scan length in bytes. + */ +unsigned int comedi_bytes_per_scan(struct comedi_subdevice *s) +{ + struct comedi_cmd *cmd = &s->async->cmd; + + return comedi_bytes_per_scan_cmd(s, cmd); +} EXPORT_SYMBOL_GPL(comedi_bytes_per_scan); static unsigned int __comedi_nscans_left(struct comedi_subdevice *s, diff --git a/drivers/staging/comedi/drivers/ni_mio_common.c b/drivers/staging/comedi/drivers/ni_mio_common.c index 5edf59ac6706..b04dad8c7092 100644 --- a/drivers/staging/comedi/drivers/ni_mio_common.c +++ b/drivers/staging/comedi/drivers/ni_mio_common.c @@ -3545,6 +3545,7 @@ static int ni_cdio_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { struct ni_private *devpriv = dev->private; + unsigned int bytes_per_scan; int err = 0; /* Step 1 : check if triggers are trivially valid */ @@ -3579,9 +3580,12 @@ static int ni_cdio_cmdtest(struct comedi_device *dev, err |= comedi_check_trigger_arg_is(&cmd->convert_arg, 0); err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); - err |= comedi_check_trigger_arg_max(&cmd->stop_arg, - s->async->prealloc_bufsz / - comedi_bytes_per_scan(s)); + bytes_per_scan = comedi_bytes_per_scan_cmd(s, cmd); + if (bytes_per_scan) { + err |= comedi_check_trigger_arg_max(&cmd->stop_arg, + s->async->prealloc_bufsz / + bytes_per_scan); + } if (err) return 3; -- cgit From ae0a6d2017f733781dcc938a471ccc2d05f9bee6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 20:42:33 +0100 Subject: staging: olpc_dcon_xo_1: add missing 'const' qualifier gcc noticed a mismatch between the type qualifiers after a recent cleanup: drivers/staging/olpc_dcon/olpc_dcon_xo_1.c: In function 'dcon_init_xo_1': drivers/staging/olpc_dcon/olpc_dcon_xo_1.c:48:26: error: initialization discards 'const' qualifier from pointer target type [-Werror=discarded-qualifiers] Add the 'const' keyword that should have been there all along. Fixes: 2159fb372929 ("staging: olpc_dcon: olpc_dcon_xo_1.c: Switch to the gpio descriptor interface") Signed-off-by: Arnd Bergmann Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/olpc_dcon/olpc_dcon_xo_1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c index 80b8d4153414..a54286498a47 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c +++ b/drivers/staging/olpc_dcon/olpc_dcon_xo_1.c @@ -45,7 +45,7 @@ static int dcon_init_xo_1(struct dcon_priv *dcon) { unsigned char lob; int ret, i; - struct dcon_gpio *pin = &gpios_asis[0]; + const struct dcon_gpio *pin = &gpios_asis[0]; for (i = 0; i < ARRAY_SIZE(gpios_asis); i++) { gpios[i] = devm_gpiod_get(&dcon->client->dev, pin[i].name, -- cgit From 1beea6204e2304dd11600791d8dad8e7350af6ad Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 4 Mar 2019 20:43:00 +0100 Subject: staging: axis-fifo: add CONFIG_OF dependency When building without CONFIG_OF, the compiler loses track of the flow control in axis_fifo_probe(), and thinks that many variables are used without an initialization even though we actually leave the function before the first use: drivers/staging/axis-fifo/axis-fifo.c: In function 'axis_fifo_probe': drivers/staging/axis-fifo/axis-fifo.c:900:5: error: 'rxd_tdata_width' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (rxd_tdata_width != 32) { ^ drivers/staging/axis-fifo/axis-fifo.c:907:5: error: 'txd_tdata_width' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (txd_tdata_width != 32) { ^ drivers/staging/axis-fifo/axis-fifo.c:914:5: error: 'has_tdest' may be used uninitialized in this function [-Werror=maybe-uninitialized] if (has_tdest) { ^ drivers/staging/axis-fifo/axis-fifo.c:919:5: error: 'has_tid' may be used uninitialized in this function [-Werror=maybe-uninitialized] When CONFIG_OF is set, this does not happen, and since the driver cannot work without it, just add that option as a Kconfig dependency. Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- drivers/staging/axis-fifo/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/axis-fifo/Kconfig b/drivers/staging/axis-fifo/Kconfig index 687537203d9c..d9725888af6f 100644 --- a/drivers/staging/axis-fifo/Kconfig +++ b/drivers/staging/axis-fifo/Kconfig @@ -3,6 +3,7 @@ # config XIL_AXIS_FIFO tristate "Xilinx AXI-Stream FIFO IP core driver" + depends on OF default n help This adds support for the Xilinx AXI-Stream -- cgit From 45ac7b31bc6c4af885cc5b5d6c534c15bcbe7643 Mon Sep 17 00:00:00 2001 From: Samuel Thibault Date: Thu, 7 Mar 2019 23:06:57 +0100 Subject: staging: speakup_soft: Fix alternate speech with other synths When switching from speakup_soft to another synth, speakup_soft would keep calling synth_buffer_getc() from softsynthx_read. Let's thus make synth.c export the knowledge of the current synth, so that speakup_soft can determine whether it should be running. speakup_soft also needs to set itself alive, otherwise the switch would let it remain silent. Signed-off-by: Samuel Thibault Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/staging/speakup/speakup_soft.c | 16 +++++++++++----- drivers/staging/speakup/spk_priv.h | 1 + drivers/staging/speakup/synth.c | 6 ++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c index edff6ce85655..9d85a3a1af4c 100644 --- a/drivers/staging/speakup/speakup_soft.c +++ b/drivers/staging/speakup/speakup_soft.c @@ -210,12 +210,15 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count, return -EINVAL; spin_lock_irqsave(&speakup_info.spinlock, flags); + synth_soft.alive = 1; while (1) { prepare_to_wait(&speakup_event, &wait, TASK_INTERRUPTIBLE); - if (!unicode) - synth_buffer_skip_nonlatin1(); - if (!synth_buffer_empty() || speakup_info.flushing) - break; + if (synth_current() == &synth_soft) { + if (!unicode) + synth_buffer_skip_nonlatin1(); + if (!synth_buffer_empty() || speakup_info.flushing) + break; + } spin_unlock_irqrestore(&speakup_info.spinlock, flags); if (fp->f_flags & O_NONBLOCK) { finish_wait(&speakup_event, &wait); @@ -235,6 +238,8 @@ static ssize_t softsynthx_read(struct file *fp, char __user *buf, size_t count, /* Keep 3 bytes available for a 16bit UTF-8-encoded character */ while (chars_sent <= count - bytes_per_ch) { + if (synth_current() != &synth_soft) + break; if (speakup_info.flushing) { speakup_info.flushing = 0; ch = '\x18'; @@ -331,7 +336,8 @@ static __poll_t softsynth_poll(struct file *fp, struct poll_table_struct *wait) poll_wait(fp, &speakup_event, wait); spin_lock_irqsave(&speakup_info.spinlock, flags); - if (!synth_buffer_empty() || speakup_info.flushing) + if (synth_current() == &synth_soft && + (!synth_buffer_empty() || speakup_info.flushing)) ret = EPOLLIN | EPOLLRDNORM; spin_unlock_irqrestore(&speakup_info.spinlock, flags); return ret; diff --git a/drivers/staging/speakup/spk_priv.h b/drivers/staging/speakup/spk_priv.h index c8e688878fc7..ac6a74883af4 100644 --- a/drivers/staging/speakup/spk_priv.h +++ b/drivers/staging/speakup/spk_priv.h @@ -74,6 +74,7 @@ int synth_request_region(unsigned long start, unsigned long n); int synth_release_region(unsigned long start, unsigned long n); int synth_add(struct spk_synth *in_synth); void synth_remove(struct spk_synth *in_synth); +struct spk_synth *synth_current(void); extern struct speakup_info_t speakup_info; diff --git a/drivers/staging/speakup/synth.c b/drivers/staging/speakup/synth.c index 25f259ee4ffc..3568bfb89912 100644 --- a/drivers/staging/speakup/synth.c +++ b/drivers/staging/speakup/synth.c @@ -481,4 +481,10 @@ void synth_remove(struct spk_synth *in_synth) } EXPORT_SYMBOL_GPL(synth_remove); +struct spk_synth *synth_current(void) +{ + return synth; +} +EXPORT_SYMBOL_GPL(synth_current); + short spk_punc_masks[] = { 0, SOME, MOST, PUNC, PUNC | B_SYM }; -- cgit From 90cd9bed5adb3e3bd4d3ac4cbcecbc4a8028bbaf Mon Sep 17 00:00:00 2001 From: Maxim Zhukov Date: Sat, 9 Mar 2019 12:54:00 +0300 Subject: staging, mt7621-pci: fix build without pci support Add depends on PCI for PCI_MT7621 Signed-off-by: Maxim Zhukov Signed-off-by: Greg Kroah-Hartman --- drivers/staging/mt7621-pci/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/staging/mt7621-pci/Kconfig b/drivers/staging/mt7621-pci/Kconfig index d33533872a16..c8fa17cfa807 100644 --- a/drivers/staging/mt7621-pci/Kconfig +++ b/drivers/staging/mt7621-pci/Kconfig @@ -1,6 +1,7 @@ config PCI_MT7621 tristate "MediaTek MT7621 PCI Controller" depends on RALINK + depends on PCI select PCI_DRIVERS_GENERIC help This selects a driver for the MediaTek MT7621 PCI Controller. -- cgit From 21d2b122732318b48c10b7262e15595ce54511d3 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 26 Feb 2019 13:44:51 -0800 Subject: drm/vgem: fix use-after-free when drm_gem_handle_create() fails If drm_gem_handle_create() fails in vgem_gem_create(), then the drm_vgem_gem_object is freed twice: once when the reference is dropped by drm_gem_object_put_unlocked(), and again by __vgem_gem_destroy(). This was hit by syzkaller using fault injection. Fix it by skipping the second free. Reported-by: syzbot+e73f2fb5ed5a5df36d33@syzkaller.appspotmail.com Fixes: af33a9190d02 ("drm/vgem: Enable dmabuf import interfaces") Reviewed-by: Chris Wilson Cc: Laura Abbott Cc: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Acked-by: Laura Abbott Signed-off-by: Rodrigo Siqueira Link: https://patchwork.freedesktop.org/patch/msgid/20190226214451.195123-1-ebiggers@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vgem/vgem_drv.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c index 5930facd6d2d..11a8f99ba18c 100644 --- a/drivers/gpu/drm/vgem/vgem_drv.c +++ b/drivers/gpu/drm/vgem/vgem_drv.c @@ -191,13 +191,9 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev, ret = drm_gem_handle_create(file, &obj->base, handle); drm_gem_object_put_unlocked(&obj->base); if (ret) - goto err; + return ERR_PTR(ret); return &obj->base; - -err: - __vgem_gem_destroy(obj); - return ERR_PTR(ret); } static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev, -- cgit From 36b6c9ed45afe89045973e8dee1b004dd5372d40 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 26 Feb 2019 14:08:58 -0800 Subject: drm/vkms: fix use-after-free when drm_gem_handle_create() fails If drm_gem_handle_create() fails in vkms_gem_create(), then the vkms_gem_object is freed twice: once when the reference is dropped by drm_gem_object_put_unlocked(), and again by the extra calls to drm_gem_object_release() and kfree(). Fix it by skipping the second release and free. This bug was originally found in the vgem driver by syzkaller using fault injection, but I noticed it's also present in the vkms driver. Fixes: 559e50fd34d1 ("drm/vkms: Add dumb operations") Cc: Rodrigo Siqueira Cc: Haneen Mohammed Cc: Daniel Vetter Cc: Chris Wilson Cc: stable@vger.kernel.org Signed-off-by: Eric Biggers Reviewed-by: Chris Wilson Reviewed-by: Rodrigo Siqueira Signed-off-by: Rodrigo Siqueira Link: https://patchwork.freedesktop.org/patch/msgid/20190226220858.214438-1-ebiggers@kernel.org Signed-off-by: Maxime Ripard --- drivers/gpu/drm/vkms/vkms_gem.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/gpu/drm/vkms/vkms_gem.c b/drivers/gpu/drm/vkms/vkms_gem.c index 138b0bb325cf..69048e73377d 100644 --- a/drivers/gpu/drm/vkms/vkms_gem.c +++ b/drivers/gpu/drm/vkms/vkms_gem.c @@ -111,11 +111,8 @@ struct drm_gem_object *vkms_gem_create(struct drm_device *dev, ret = drm_gem_handle_create(file, &obj->gem, handle); drm_gem_object_put_unlocked(&obj->gem); - if (ret) { - drm_gem_object_release(&obj->gem); - kfree(obj); + if (ret) return ERR_PTR(ret); - } return &obj->gem; } -- cgit From b5b4453e7912f056da1ca7572574cada32ecb60c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 14 Mar 2019 00:14:38 +1100 Subject: powerpc/vdso64: Fix CLOCK_MONOTONIC inconsistencies across Y2038 Jakub Drnec reported: Setting the realtime clock can sometimes make the monotonic clock go back by over a hundred years. Decreasing the realtime clock across the y2k38 threshold is one reliable way to reproduce. Allegedly this can also happen just by running ntpd, I have not managed to reproduce that other than booting with rtc at >2038 and then running ntp. When this happens, anything with timers (e.g. openjdk) breaks rather badly. And included a test case (slightly edited for brevity): #define _POSIX_C_SOURCE 199309L #include #include #include #include long get_time(void) { struct timespec tp; clock_gettime(CLOCK_MONOTONIC, &tp); return tp.tv_sec + tp.tv_nsec / 1000000000; } int main(void) { long last = get_time(); while(1) { long now = get_time(); if (now < last) { printf("clock went backwards by %ld seconds!\n", last - now); } last = now; sleep(1); } return 0; } Which when run concurrently with: # date -s 2040-1-1 # date -s 2037-1-1 Will detect the clock going backward. The root cause is that wtom_clock_sec in struct vdso_data is only a 32-bit signed value, even though we set its value to be equal to tk->wall_to_monotonic.tv_sec which is 64-bits. Because the monotonic clock starts at zero when the system boots the wall_to_montonic.tv_sec offset is negative for current and future dates. Currently on a freshly booted system the offset will be in the vicinity of negative 1.5 billion seconds. However if the wall clock is set past the Y2038 boundary, the offset from wall to monotonic becomes less than negative 2^31, and no longer fits in 32-bits. When that value is assigned to wtom_clock_sec it is truncated and becomes positive, causing the VDSO assembly code to calculate CLOCK_MONOTONIC incorrectly. That causes CLOCK_MONOTONIC to jump ahead by ~4 billion seconds which it is not meant to do. Worse, if the time is then set back before the Y2038 boundary CLOCK_MONOTONIC will jump backward. We can fix it simply by storing the full 64-bit offset in the vdso_data, and using that in the VDSO assembly code. We also shuffle some of the fields in vdso_data to avoid creating a hole. The original commit that added the CLOCK_MONOTONIC support to the VDSO did actually use a 64-bit value for wtom_clock_sec, see commit a7f290dad32e ("[PATCH] powerpc: Merge vdso's and add vdso support to 32 bits kernel") (Nov 2005). However just 3 days later it was converted to 32-bits in commit 0c37ec2aa88b ("[PATCH] powerpc: vdso fixes (take #2)"), and the bug has existed since then AFAICS. Fixes: 0c37ec2aa88b ("[PATCH] powerpc: vdso fixes (take #2)") Cc: stable@vger.kernel.org # v2.6.15+ Link: http://lkml.kernel.org/r/HaC.ZfES.62bwlnvAvMP.1STMMj@seznam.cz Reported-by: Jakub Drnec Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/vdso_datapage.h | 8 ++++---- arch/powerpc/kernel/vdso64/gettimeofday.S | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 1afe90ade595..bbc06bd72b1f 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -82,10 +82,10 @@ struct vdso_data { __u32 icache_block_size; /* L1 i-cache block size */ __u32 dcache_log_block_size; /* L1 d-cache log block size */ __u32 icache_log_block_size; /* L1 i-cache log block size */ - __s32 wtom_clock_sec; /* Wall to monotonic clock */ - __s32 wtom_clock_nsec; - struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ - __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __u32 stamp_sec_fraction; /* fractional seconds of stamp_xtime */ + __s32 wtom_clock_nsec; /* Wall to monotonic clock nsec */ + __s64 wtom_clock_sec; /* Wall to monotonic clock sec */ + struct timespec stamp_xtime; /* xtime as at tb_orig_stamp */ __u32 syscall_map_64[SYSCALL_MAP_SIZE]; /* map of syscalls */ __u32 syscall_map_32[SYSCALL_MAP_SIZE]; /* map of syscalls */ }; diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S index a4ed9edfd5f0..1f324c28705b 100644 --- a/arch/powerpc/kernel/vdso64/gettimeofday.S +++ b/arch/powerpc/kernel/vdso64/gettimeofday.S @@ -92,7 +92,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) * At this point, r4,r5 contain our sec/nsec values. */ - lwa r6,WTOM_CLOCK_SEC(r3) + ld r6,WTOM_CLOCK_SEC(r3) lwa r9,WTOM_CLOCK_NSEC(r3) /* We now have our result in r6,r9. We create a fake dependency @@ -125,7 +125,7 @@ V_FUNCTION_BEGIN(__kernel_clock_gettime) bne cr6,75f /* CLOCK_MONOTONIC_COARSE */ - lwa r6,WTOM_CLOCK_SEC(r3) + ld r6,WTOM_CLOCK_SEC(r3) lwa r9,WTOM_CLOCK_NSEC(r3) /* check if counter has updated */ -- cgit From e60a582bcde01158a64ff948fb799f21f5d31a11 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 11:09:19 +0100 Subject: mmc: pxamci: fix enum type confusion clang points out several instances of mismatched types in this drivers, all coming from a single declaration: drivers/mmc/host/pxamci.c:193:15: error: implicit conversion from enumeration type 'enum dma_transfer_direction' to different enumeration type 'enum dma_data_direction' [-Werror,-Wenum-conversion] direction = DMA_DEV_TO_MEM; ~ ^~~~~~~~~~~~~~ drivers/mmc/host/pxamci.c:212:62: error: implicit conversion from enumeration type 'enum dma_data_direction' to different enumeration type 'enum dma_transfer_direction' [-Werror,-Wenum-conversion] tx = dmaengine_prep_slave_sg(chan, data->sg, host->dma_len, direction, The behavior is correct, so this must be a simply typo from dma_data_direction and dma_transfer_direction being similarly named types with a similar purpose. Fixes: 6464b7140951 ("mmc: pxamci: switch over to dmaengine use") Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Acked-by: Robert Jarzmik Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/pxamci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/pxamci.c b/drivers/mmc/host/pxamci.c index c907bf502a12..c1d3f0e38921 100644 --- a/drivers/mmc/host/pxamci.c +++ b/drivers/mmc/host/pxamci.c @@ -162,7 +162,7 @@ static void pxamci_dma_irq(void *param); static void pxamci_setup_data(struct pxamci_host *host, struct mmc_data *data) { struct dma_async_tx_descriptor *tx; - enum dma_data_direction direction; + enum dma_transfer_direction direction; struct dma_slave_config config; struct dma_chan *chan; unsigned int nob = data->blocks; -- cgit From 9ce58dd7d9da3ca0d7cb8c9568f1c6f4746da65a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 11:10:11 +0100 Subject: mmc: davinci: remove extraneous __init annotation Building with clang finds a mistaken __init tag: WARNING: vmlinux.o(.text+0x5e4250): Section mismatch in reference from the function davinci_mmcsd_probe() to the function .init.text:init_mmcsd_host() The function davinci_mmcsd_probe() references the function __init init_mmcsd_host(). This is often because davinci_mmcsd_probe lacks a __init annotation or the annotation of init_mmcsd_host is wrong. Signed-off-by: Arnd Bergmann Acked-by: Wolfram Sang Reviewed-by: Nathan Chancellor Signed-off-by: Ulf Hansson --- drivers/mmc/host/davinci_mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index 49e0daf2ef5e..f37003df1e01 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1117,7 +1117,7 @@ static inline void mmc_davinci_cpufreq_deregister(struct mmc_davinci_host *host) { } #endif -static void __init init_mmcsd_host(struct mmc_davinci_host *host) +static void init_mmcsd_host(struct mmc_davinci_host *host) { mmc_davinci_reset_ctrl(host, 1); -- cgit From 4e50ce03976fbc8ae995a000c4b10c737467beaa Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Wed, 13 Mar 2019 10:03:17 +0100 Subject: iommu/amd: fix sg->dma_address for sg->offset bigger than PAGE_SIZE Take into account that sg->offset can be bigger than PAGE_SIZE when setting segment sg->dma_address. Otherwise sg->dma_address will point at diffrent page, what makes DMA not possible with erros like this: xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa70c0 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7040 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7080 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7100 flags=0x0020] xhci_hcd 0000:38:00.3: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0000 address=0x00000000fdaa7000 flags=0x0020] Additinally with wrong sg->dma_address unmap_sg will free wrong pages, what what can cause crashes like this: Feb 28 19:27:45 kernel: BUG: Bad page state in process cinnamon pfn:39e8b1 Feb 28 19:27:45 kernel: Disabling lock debugging due to kernel taint Feb 28 19:27:45 kernel: flags: 0x2ffff0000000000() Feb 28 19:27:45 kernel: raw: 02ffff0000000000 0000000000000000 ffffffff00000301 0000000000000000 Feb 28 19:27:45 kernel: raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 Feb 28 19:27:45 kernel: page dumped because: nonzero _refcount Feb 28 19:27:45 kernel: Modules linked in: ccm fuse arc4 nct6775 hwmon_vid amdgpu nls_iso8859_1 nls_cp437 edac_mce_amd vfat fat kvm_amd ccp rng_core kvm mt76x0u mt76x0_common mt76x02_usb irqbypass mt76_usb mt76x02_lib mt76 crct10dif_pclmul crc32_pclmul chash mac80211 amd_iommu_v2 ghash_clmulni_intel gpu_sched i2c_algo_bit ttm wmi_bmof snd_hda_codec_realtek snd_hda_codec_generic drm_kms_helper snd_hda_codec_hdmi snd_hda_intel drm snd_hda_codec aesni_intel snd_hda_core snd_hwdep aes_x86_64 crypto_simd snd_pcm cfg80211 cryptd mousedev snd_timer glue_helper pcspkr r8169 input_leds realtek agpgart libphy rfkill snd syscopyarea sysfillrect sysimgblt fb_sys_fops soundcore sp5100_tco k10temp i2c_piix4 wmi evdev gpio_amdpt pinctrl_amd mac_hid pcc_cpufreq acpi_cpufreq sg ip_tables x_tables ext4(E) crc32c_generic(E) crc16(E) mbcache(E) jbd2(E) fscrypto(E) sd_mod(E) hid_generic(E) usbhid(E) hid(E) dm_mod(E) serio_raw(E) atkbd(E) libps2(E) crc32c_intel(E) ahci(E) libahci(E) libata(E) xhci_pci(E) xhci_hcd(E) Feb 28 19:27:45 kernel: scsi_mod(E) i8042(E) serio(E) bcache(E) crc64(E) Feb 28 19:27:45 kernel: CPU: 2 PID: 896 Comm: cinnamon Tainted: G B W E 4.20.12-arch1-1-custom #1 Feb 28 19:27:45 kernel: Hardware name: To Be Filled By O.E.M. To Be Filled By O.E.M./B450M Pro4, BIOS P1.20 06/26/2018 Feb 28 19:27:45 kernel: Call Trace: Feb 28 19:27:45 kernel: dump_stack+0x5c/0x80 Feb 28 19:27:45 kernel: bad_page.cold.29+0x7f/0xb2 Feb 28 19:27:45 kernel: __free_pages_ok+0x2c0/0x2d0 Feb 28 19:27:45 kernel: skb_release_data+0x96/0x180 Feb 28 19:27:45 kernel: __kfree_skb+0xe/0x20 Feb 28 19:27:45 kernel: tcp_recvmsg+0x894/0xc60 Feb 28 19:27:45 kernel: ? reuse_swap_page+0x120/0x340 Feb 28 19:27:45 kernel: ? ptep_set_access_flags+0x23/0x30 Feb 28 19:27:45 kernel: inet_recvmsg+0x5b/0x100 Feb 28 19:27:45 kernel: __sys_recvfrom+0xc3/0x180 Feb 28 19:27:45 kernel: ? handle_mm_fault+0x10a/0x250 Feb 28 19:27:45 kernel: ? syscall_trace_enter+0x1d3/0x2d0 Feb 28 19:27:45 kernel: ? __audit_syscall_exit+0x22a/0x290 Feb 28 19:27:45 kernel: __x64_sys_recvfrom+0x24/0x30 Feb 28 19:27:45 kernel: do_syscall_64+0x5b/0x170 Feb 28 19:27:45 kernel: entry_SYSCALL_64_after_hwframe+0x44/0xa9 Cc: stable@vger.kernel.org Reported-and-tested-by: Jan Viktorin Reviewed-by: Alexander Duyck Signed-off-by: Stanislaw Gruszka Fixes: 80187fd39dcb ('iommu/amd: Optimize map_sg and unmap_sg') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b319e51c379b..21cb088d6687 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2608,7 +2608,12 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, /* Everything is mapped - write the right values into s->dma_address */ for_each_sg(sglist, s, nelems, i) { - s->dma_address += address + s->offset; + /* + * Add in the remaining piece of the scatter-gather offset that + * was masked out when we were determining the physical address + * via (sg_phys(s) & PAGE_MASK) earlier. + */ + s->dma_address += address + (s->offset & ~PAGE_MASK); s->dma_length = s->length; } -- cgit From 721f1e6c1fd137e7e2053d8e103b666faaa2d50c Mon Sep 17 00:00:00 2001 From: Jaroslav Kysela Date: Mon, 18 Mar 2019 13:45:43 +0100 Subject: ALSA: hda - add Lenovo IdeaCentre B550 to the power_save_blacklist Another machine which does not like the power saving (noise): https://bugzilla.redhat.com/show_bug.cgi?id=1689623 Also, reorder the Lenovo C50 entry to keep the table sorted. Reported-by: hs.guimaraes@outlook.com Signed-off-by: Jaroslav Kysela Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index e5c49003e75f..4f502c92061f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2144,10 +2144,12 @@ static struct snd_pci_quirk power_save_blacklist[] = { SND_PCI_QUIRK(0x8086, 0x2057, "Intel NUC5i7RYB", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1520902 */ SND_PCI_QUIRK(0x8086, 0x2068, "Intel NUC7i3BNB", 0), - /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ - SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1689623 */ + SND_PCI_QUIRK(0x17aa, 0x367b, "Lenovo IdeaCentre B550", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ + SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), {} }; #endif /* CONFIG_PM */ -- cgit From b4748e7ab731e436cf5db4786358ada5dd2db6dd Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Mar 2019 23:21:24 +0000 Subject: ALSA: opl3: fix mismatch between snd_opl3_drum_switch definition and declaration The function snd_opl3_drum_switch declaration in the header file has the order of the two arguments on_off and vel swapped when compared to the definition arguments of vel and on_off. Fix this by swapping them around to match the definition. This error predates the git history, so no idea when this error was introduced. Signed-off-by: Colin Ian King Signed-off-by: Takashi Iwai --- sound/drivers/opl3/opl3_voice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/opl3/opl3_voice.h b/sound/drivers/opl3/opl3_voice.h index 5b02bd49fde4..4e4ecc21760b 100644 --- a/sound/drivers/opl3/opl3_voice.h +++ b/sound/drivers/opl3/opl3_voice.h @@ -41,7 +41,7 @@ void snd_opl3_timer_func(struct timer_list *t); /* Prototypes for opl3_drums.c */ void snd_opl3_load_drums(struct snd_opl3 *opl3); -void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int on_off, int vel, struct snd_midi_channel *chan); +void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int vel, int on_off, struct snd_midi_channel *chan); /* Prototypes for opl3_oss.c */ #if IS_ENABLED(CONFIG_SND_SEQUENCER_OSS) -- cgit From 4622a2d43101ea2e3d54a2af090f25a5886c648b Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Mon, 11 Mar 2019 08:30:27 +0000 Subject: powerpc/6xx: fix setup and use of SPRN_SPRG_PGDIR for hash32 Not only the 603 but all 6xx need SPRN_SPRG_PGDIR to be initialised at startup. This patch move it from __setup_cpu_603() to start_here() and __secondary_start(), close to the initialisation of SPRN_THREAD. Previously, virt addr of PGDIR was retrieved from thread struct. Now that it is the phys addr which is stored in SPRN_SPRG_PGDIR, hash_page() shall not convert it to phys anymore. This patch removes the conversion. Fixes: 93c4a162b014 ("powerpc/6xx: Store PGDIR physical address in a SPRG") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/cpu_setup_6xx.S | 3 --- arch/powerpc/kernel/head_32.S | 6 ++++++ arch/powerpc/mm/hash_low_32.S | 8 ++++---- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S index 6f1c11e0691f..7534ecff5e92 100644 --- a/arch/powerpc/kernel/cpu_setup_6xx.S +++ b/arch/powerpc/kernel/cpu_setup_6xx.S @@ -24,9 +24,6 @@ BEGIN_MMU_FTR_SECTION li r10,0 mtspr SPRN_SPRG_603_LRU,r10 /* init SW LRU tracking */ END_MMU_FTR_SECTION_IFSET(MMU_FTR_NEED_DTLB_SW_LRU) - lis r10, (swapper_pg_dir - PAGE_OFFSET)@h - ori r10, r10, (swapper_pg_dir - PAGE_OFFSET)@l - mtspr SPRN_SPRG_PGDIR, r10 BEGIN_FTR_SECTION bl __init_fpu_registers diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S index ce6a972f2584..48051c8977c5 100644 --- a/arch/powerpc/kernel/head_32.S +++ b/arch/powerpc/kernel/head_32.S @@ -855,6 +855,9 @@ __secondary_start: li r3,0 stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ #endif + lis r4, (swapper_pg_dir - PAGE_OFFSET)@h + ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l + mtspr SPRN_SPRG_PGDIR, r4 /* enable MMU and jump to start_secondary */ li r4,MSR_KERNEL @@ -942,6 +945,9 @@ start_here: li r3,0 stw r3, RTAS_SP(r4) /* 0 => not in RTAS */ #endif + lis r4, (swapper_pg_dir - PAGE_OFFSET)@h + ori r4, r4, (swapper_pg_dir - PAGE_OFFSET)@l + mtspr SPRN_SPRG_PGDIR, r4 /* stack */ lis r1,init_thread_union@ha diff --git a/arch/powerpc/mm/hash_low_32.S b/arch/powerpc/mm/hash_low_32.S index 1f13494efb2b..a6c491f18a04 100644 --- a/arch/powerpc/mm/hash_low_32.S +++ b/arch/powerpc/mm/hash_low_32.S @@ -70,12 +70,12 @@ _GLOBAL(hash_page) lis r0,KERNELBASE@h /* check if kernel address */ cmplw 0,r4,r0 ori r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */ - mfspr r5, SPRN_SPRG_PGDIR /* virt page-table root */ + mfspr r5, SPRN_SPRG_PGDIR /* phys page-table root */ blt+ 112f /* assume user more likely */ - lis r5,swapper_pg_dir@ha /* if kernel address, use */ - addi r5,r5,swapper_pg_dir@l /* kernel page table */ + lis r5, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */ + addi r5 ,r5 ,(swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */ rlwimi r3,r9,32-12,29,29 /* MSR_PR -> _PAGE_USER */ -112: tophys(r5, r5) +112: #ifndef CONFIG_PTE_64BIT rlwimi r5,r4,12,20,29 /* insert top 10 bits of address */ lwz r8,0(r5) /* get pmd entry */ -- cgit From 6ce59025f1182125e75c8d121daf44056b65dd1f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2019 08:08:43 -0600 Subject: paride/pf: cleanup queues when detection fails The driver allocates queues for all the units it potentially supports. But if we fail to detect any drives, then we fail loading the module without cleaning up those queues. This is now evident with the switch to blk-mq, though the bug has been there forever as far as I can tell. Also fix cleanup through regular module exit. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jens Axboe --- drivers/block/paride/pf.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/block/paride/pf.c b/drivers/block/paride/pf.c index e92e7a8eeeb2..103b617cdc31 100644 --- a/drivers/block/paride/pf.c +++ b/drivers/block/paride/pf.c @@ -761,8 +761,12 @@ static int pf_detect(void) return 0; printk("%s: No ATAPI disk detected\n", name); - for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) + for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { + blk_cleanup_queue(pf->disk->queue); + pf->disk->queue = NULL; + blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); + } pi_unregister_driver(par_drv); return -1; } @@ -1047,13 +1051,15 @@ static void __exit pf_exit(void) int unit; unregister_blkdev(major, name); for (pf = units, unit = 0; unit < PF_UNITS; pf++, unit++) { - if (!pf->present) - continue; - del_gendisk(pf->disk); + if (pf->present) + del_gendisk(pf->disk); + blk_cleanup_queue(pf->disk->queue); blk_mq_free_tag_set(&pf->tag_set); put_disk(pf->disk); - pi_release(pf->pi); + + if (pf->present) + pi_release(pf->pi); } } -- cgit From 81b74ac68c28fddb3589ad5d4d5e587baf4bb781 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Mar 2019 08:10:32 -0600 Subject: paride/pcd: cleanup queues when detection fails The driver allocates queues for all the units it potentially supports. But if we fail to detect any drives, then we fail loading the module without cleaning up those queues. This is now evident with the switch to blk-mq, though the bug has been there forever as far as I can tell. Also fix cleanup through regular module exit. Reported-by: Randy Dunlap Tested-by: Randy Dunlap Signed-off-by: Jens Axboe --- drivers/block/paride/pcd.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index 96670eefaeb2..377a694dc228 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -749,8 +749,12 @@ static int pcd_detect(void) return 0; printk("%s: No CD-ROM drive found\n", name); - for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) + for (unit = 0, cd = pcd; unit < PCD_UNITS; unit++, cd++) { + blk_cleanup_queue(cd->disk->queue); + cd->disk->queue = NULL; + blk_mq_free_tag_set(&cd->tag_set); put_disk(cd->disk); + } pi_unregister_driver(par_drv); return -1; } -- cgit From aa36e3616532f82a920b5ebf4e059fbafae63d88 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sat, 19 Jan 2019 17:15:23 +0100 Subject: thermal/intel_powerclamp: fix __percpu declaration of worker_data This variable is declared as: static struct powerclamp_worker_data * __percpu worker_data; In other words, a percpu pointer to struct ... But this variable not used like so but as a pointer to a percpu struct powerclamp_worker_data. So fix the declaration as: static struct powerclamp_worker_data __percpu *worker_data; This also quiets Sparse's warnings from __verify_pcpu_ptr(), like: 494:49: warning: incorrect type in initializer (different address spaces) 494:49: expected void const [noderef] *__vpp_verify 494:49: got struct powerclamp_worker_data * Signed-off-by: Luc Van Oostenryck Reviewed-by: Petr Mladek Signed-off-by: Zhang Rui --- drivers/thermal/intel/intel_powerclamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 7571f7c2e7c9..b12ecd436e23 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -101,7 +101,7 @@ struct powerclamp_worker_data { bool clamping; }; -static struct powerclamp_worker_data * __percpu worker_data; +static struct powerclamp_worker_data __percpu *worker_data; static struct thermal_cooling_device *cooling_dev; static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu * clamping kthread worker -- cgit From 3b5236cc5d086dd3ddd01113ee9255421aab9fab Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 22 Jan 2019 16:47:41 +0100 Subject: thermal: samsung: Fix incorrect check after code merge Merge commit 19785cf93b6c ("Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal") broke the code introduced by commit ffe6e16f14fa ("thermal: exynos: Reduce severity of too early temperature read"). Restore the original code from the mentioned commit to finally fix the warning message during boot: thermal thermal_zone0: failed to read out thermal zone (-22) Reported-by: Marian Mihailescu Signed-off-by: Marek Szyprowski Fixes: 19785cf93b6c ("Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/evalenti/linux-soc-thermal") Reviewed-by: Krzysztof Kozlowski Signed-off-by: Zhang Rui --- drivers/thermal/samsung/exynos_tmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 48eef552cba4..fc9399d9c082 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -666,7 +666,7 @@ static int exynos_get_temp(void *p, int *temp) struct exynos_tmu_data *data = p; int value, ret = 0; - if (!data || !data->tmu_read || !data->enabled) + if (!data || !data->tmu_read) return -EINVAL; else if (!data->enabled) /* -- cgit From 35122495a8c6683e863acf7b05a7036b2be64c7a Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 29 Jan 2019 09:55:57 +0000 Subject: thermal: bcm2835: Fix crash in bcm2835_thermal_debugfs "cat /sys/kernel/debug/bcm2835_thermal/regset" causes a NULL pointer dereference in bcm2835_thermal_debugfs. The driver makes use of the implementation details of the thermal framework to retrieve a pointer to its private data from a struct thermal_zone_device, and gets it wrong - leading to the crash. Instead, store its private data as the drvdata and retrieve the thermal_zone_device pointer from it. Fixes: bcb7dd9ef206 ("thermal: bcm2835: add thermal driver for bcm2835 SoC") Signed-off-by: Phil Elwell Signed-off-by: Zhang Rui --- drivers/thermal/broadcom/bcm2835_thermal.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c index 720760cd493f..ba39647a690c 100644 --- a/drivers/thermal/broadcom/bcm2835_thermal.c +++ b/drivers/thermal/broadcom/bcm2835_thermal.c @@ -119,8 +119,7 @@ static const struct debugfs_reg32 bcm2835_thermal_regs[] = { static void bcm2835_thermal_debugfs(struct platform_device *pdev) { - struct thermal_zone_device *tz = platform_get_drvdata(pdev); - struct bcm2835_thermal_data *data = tz->devdata; + struct bcm2835_thermal_data *data = platform_get_drvdata(pdev); struct debugfs_regset32 *regset; data->debugfsdir = debugfs_create_dir("bcm2835_thermal", NULL); @@ -266,7 +265,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) data->tz = tz; - platform_set_drvdata(pdev, tz); + platform_set_drvdata(pdev, data); /* * Thermal_zone doesn't enable hwmon as default, @@ -290,8 +289,8 @@ err_clk: static int bcm2835_thermal_remove(struct platform_device *pdev) { - struct thermal_zone_device *tz = platform_get_drvdata(pdev); - struct bcm2835_thermal_data *data = tz->devdata; + struct bcm2835_thermal_data *data = platform_get_drvdata(pdev); + struct thermal_zone_device *tz = data->tz; debugfs_remove_recursive(data->debugfsdir); thermal_zone_of_sensor_unregister(&pdev->dev, tz); -- cgit From e0fda7377d30685feaef4d93d9fdfde91c5d7d9a Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Mon, 18 Feb 2019 14:22:30 +0800 Subject: thermal: cpu_cooling: Remove unused cur_freq variable The 'cur_freq' local variable became unused after commit 84fe2cab4859 ("cpu_cooling: Drop static-power related stuff"), let's remove it. Cc: Amit Daniel Kachhap Cc: Viresh Kumar Cc: Javi Merino Cc: Zhang Rui Cc: Eduardo Valentin Cc: Daniel Lezcano Signed-off-by: Shaokun Zhang Acked-by: Viresh Kumar Signed-off-by: Zhang Rui --- drivers/thermal/cpu_cooling.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index 6fff16113628..f7c1f49ec87f 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -536,12 +536,11 @@ static int cpufreq_power2state(struct thermal_cooling_device *cdev, struct thermal_zone_device *tz, u32 power, unsigned long *state) { - unsigned int cur_freq, target_freq; + unsigned int target_freq; u32 last_load, normalised_power; struct cpufreq_cooling_device *cpufreq_cdev = cdev->devdata; struct cpufreq_policy *policy = cpufreq_cdev->policy; - cur_freq = cpufreq_quick_get(policy->cpu); power = power > 0 ? power : 0; last_load = cpufreq_cdev->last_load ?: 1; normalised_power = (power * 100) / last_load; -- cgit From 16fc8eca1975358111dbd7ce65e4ce42d1a848fb Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 10 Oct 2018 01:30:06 -0700 Subject: thermal/int340x_thermal: Add additional UUIDs Add more supported DPTF policies than the driver currently exposes. Signed-off-by: Matthew Garrett Cc: Nisha Aram Signed-off-by: Zhang Rui --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index 61ca7ce3624e..e0f39cacbc18 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -22,6 +22,13 @@ enum int3400_thermal_uuid { INT3400_THERMAL_PASSIVE_1, INT3400_THERMAL_ACTIVE, INT3400_THERMAL_CRITICAL, + INT3400_THERMAL_ADAPTIVE_PERFORMANCE, + INT3400_THERMAL_EMERGENCY_CALL_MODE, + INT3400_THERMAL_PASSIVE_2, + INT3400_THERMAL_POWER_BOSS, + INT3400_THERMAL_VIRTUAL_SENSOR, + INT3400_THERMAL_COOLING_MODE, + INT3400_THERMAL_HARDWARE_DUTY_CYCLING, INT3400_THERMAL_MAXIMUM_UUID, }; @@ -29,6 +36,13 @@ static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "3A95C389-E4B8-4629-A526-C52C88626BAE", "97C68AE7-15FA-499c-B8C9-5DA81D606E0A", + "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D", + "5349962F-71E6-431D-9AE8-0A635B710AEE", + "9E04115A-AE87-4D1C-9500-0F3E340BFE75", + "F5A35014-C209-46A4-993A-EB56DE7530A1", + "6ED722A7-9240-48A5-B479-31EEF723D7CF", + "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531", + "BE84BABF-C4D4-403D-B495-3128FD44dAC1", }; struct int3400_thermal_priv { -- cgit From 396ee4d0cd52c13b3f6421b8d324d65da5e7e409 Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 10 Oct 2018 01:30:07 -0700 Subject: thermal/int340x_thermal: fix mode setting int3400 only pushes the UUID into the firmware when the mode is flipped to "enable". The current code only exposes the mode flag if the firmware supports the PASSIVE_1 UUID, which not all machines do. Remove the restriction. Signed-off-by: Matthew Garrett Signed-off-by: Zhang Rui --- drivers/thermal/intel/int340x_thermal/int3400_thermal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c index e0f39cacbc18..5f3ed24e26ec 100644 --- a/drivers/thermal/intel/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/intel/int340x_thermal/int3400_thermal.c @@ -313,10 +313,9 @@ static int int3400_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - if (priv->uuid_bitmap & 1 << INT3400_THERMAL_PASSIVE_1) { - int3400_thermal_ops.get_mode = int3400_thermal_get_mode; - int3400_thermal_ops.set_mode = int3400_thermal_set_mode; - } + int3400_thermal_ops.get_mode = int3400_thermal_get_mode; + int3400_thermal_ops.set_mode = int3400_thermal_set_mode; + priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0, priv, &int3400_thermal_ops, &int3400_thermal_params, 0, 0); -- cgit From 684b73245cd4d2608f4f2214f6bff02ba6ceca5f Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Fri, 15 Mar 2019 11:05:10 +0800 Subject: blk-mq: use blk_mq_sched_mark_restart_hctx to set RESTART Let blk_mq_mark_tag_wait() use the blk_mq_sched_mark_restart_hctx() to set BLK_MQ_S_SCHED_RESTART. Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index a9c181603cbd..ea01c23b58a3 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1093,8 +1093,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, bool ret; if (!(hctx->flags & BLK_MQ_F_TAG_SHARED)) { - if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) - set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); + blk_mq_sched_mark_restart_hctx(hctx); /* * It's possible that a tag was freed in the window between the -- cgit From f7c8a4120eedf24c36090b7542b179ff7a649219 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Mon, 18 Mar 2019 20:23:17 +0800 Subject: loop: access lo_backing_file only when the loop device is Lo_bound Commit 758a58d0bc67 ("loop: set GENHD_FL_NO_PART_SCAN after blkdev_reread_part()") separates "lo->lo_backing_file = NULL" and "lo->lo_state = Lo_unbound" into different critical regions protected by loop_ctl_mutex. However, there is below race that the NULL lo->lo_backing_file would be accessed when the backend of a loop is another loop device, e.g., loop0's backend is a file, while loop1's backend is loop0. loop0's backend is file loop1's backend is loop0 __loop_clr_fd() mutex_lock(&loop_ctl_mutex); lo->lo_backing_file = NULL; --> set to NULL mutex_unlock(&loop_ctl_mutex); loop_set_fd() mutex_lock_killable(&loop_ctl_mutex); loop_validate_file() f = l->lo_backing_file; --> NULL access if loop0 is not Lo_unbound mutex_lock(&loop_ctl_mutex); lo->lo_state = Lo_unbound; mutex_unlock(&loop_ctl_mutex); lo->lo_backing_file should be accessed only when the loop device is Lo_bound. In fact, the problem has been introduced already in commit 7ccd0791d985 ("loop: Push loop_ctl_mutex down into loop_clr_fd()") after which loop_validate_file() could see devices in Lo_rundown state with which it did not count. It was harmless at that point but still. Fixes: 7ccd0791d985 ("loop: Push loop_ctl_mutex down into loop_clr_fd()") Reported-by: syzbot+9bdc1adc1c55e7fe765b@syzkaller.appspotmail.com Signed-off-by: Dongli Zhang Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 1e6edd568214..bf1c61cab8eb 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -656,7 +656,7 @@ static int loop_validate_file(struct file *file, struct block_device *bdev) return -EBADF; l = f->f_mapping->host->i_bdev->bd_disk->private_data; - if (l->lo_state == Lo_unbound) { + if (l->lo_state != Lo_bound) { return -EINVAL; } f = l->lo_backing_file; -- cgit From fb4d83f293e072ad96848959d20664e2a9f2235b Mon Sep 17 00:00:00 2001 From: Pi-Hsun Shih Date: Wed, 9 Jan 2019 13:57:24 +0800 Subject: thermal: mtk: Allocate enough space for mtk_thermal. The mtk_thermal struct contains a 'struct mtk_thermal_bank banks[];', but the allocation only allocates sizeof(struct mtk_thermal) bytes, which cause out of bound access with the ->banks[] member. Change it to a fixed size array instead. Signed-off-by: Pi-Hsun Shih Reviewed-by: Daniel Lezcano Signed-off-by: Zhang Rui --- drivers/thermal/mtk_thermal.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/thermal/mtk_thermal.c b/drivers/thermal/mtk_thermal.c index 5c07a61447d3..e4ea7f6aef20 100644 --- a/drivers/thermal/mtk_thermal.c +++ b/drivers/thermal/mtk_thermal.c @@ -199,6 +199,9 @@ enum { #define MT7622_TS1 0 #define MT7622_NUM_CONTROLLER 1 +/* The maximum number of banks */ +#define MAX_NUM_ZONES 8 + /* The calibration coefficient of sensor */ #define MT7622_CALIBRATION 165 @@ -249,7 +252,7 @@ struct mtk_thermal_data { const int num_controller; const int *controller_offset; bool need_switch_bank; - struct thermal_bank_cfg bank_data[]; + struct thermal_bank_cfg bank_data[MAX_NUM_ZONES]; }; struct mtk_thermal { @@ -268,7 +271,7 @@ struct mtk_thermal { s32 vts[MAX_NUM_VTS]; const struct mtk_thermal_data *conf; - struct mtk_thermal_bank banks[]; + struct mtk_thermal_bank banks[MAX_NUM_ZONES]; }; /* MT8183 thermal sensor data */ -- cgit From e925b5be5751f6a7286bbd9a4cbbc4ac90cc5fa6 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 18 Mar 2019 22:26:33 +0800 Subject: thermal/intel_powerclamp: fix truncated kthread name kthread name only allows 15 characters (TASK_COMMON_LEN is 16). Thus rename the kthreads created by intel_powerclamp driver from "kidle_inject/ + decimal cpuid" to "kidle_inj/ + decimal cpuid" to avoid truncated kthead name for cpu 100 and later. Signed-off-by: Zhang Rui --- drivers/thermal/intel/intel_powerclamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel/intel_powerclamp.c b/drivers/thermal/intel/intel_powerclamp.c index 7571f7c2e7c9..9434f4eb421a 100644 --- a/drivers/thermal/intel/intel_powerclamp.c +++ b/drivers/thermal/intel/intel_powerclamp.c @@ -494,7 +494,7 @@ static void start_power_clamp_worker(unsigned long cpu) struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); struct kthread_worker *worker; - worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inject/%ld", cpu); + worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu); if (IS_ERR(worker)) return; -- cgit From 29b0b5d56589d66bd5793f1e09211ce7d7d3cd36 Mon Sep 17 00:00:00 2001 From: Alin Nastac Date: Mon, 11 Mar 2019 17:18:42 +0100 Subject: netfilter: nf_conntrack_sip: remove direct dependency on IPv6 Previous implementation was not usable with CONFIG_IPV6=m. Fixes: a3419ce3356c ("netfilter: nf_conntrack_sip: add sip_external_media logic") Signed-off-by: Alin Nastac Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_sip.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index f067c6b50857..39fcc1ed18f3 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -20,9 +20,9 @@ #include #include #include +#include +#include -#include -#include #include #include #include @@ -871,38 +871,33 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, } else if (sip_external_media) { struct net_device *dev = skb_dst(skb)->dev; struct net *net = dev_net(dev); - struct rtable *rt; - struct flowi4 fl4 = {}; -#if IS_ENABLED(CONFIG_IPV6) - struct flowi6 fl6 = {}; -#endif + struct flowi fl; struct dst_entry *dst = NULL; + memset(&fl, 0, sizeof(fl)); + switch (nf_ct_l3num(ct)) { case NFPROTO_IPV4: - fl4.daddr = daddr->ip; - rt = ip_route_output_key(net, &fl4); - if (!IS_ERR(rt)) - dst = &rt->dst; + fl.u.ip4.daddr = daddr->ip; + nf_ip_route(net, &dst, &fl, false); break; -#if IS_ENABLED(CONFIG_IPV6) case NFPROTO_IPV6: - fl6.daddr = daddr->in6; - dst = ip6_route_output(net, NULL, &fl6); - if (dst->error) { - dst_release(dst); - dst = NULL; - } + fl.u.ip6.daddr = daddr->in6; + nf_ip6_route(net, &dst, &fl, false); break; -#endif } /* Don't predict any conntracks when media endpoint is reachable * through the same interface as the signalling peer. */ - if (dst && dst->dev == dev) - return NF_ACCEPT; + if (dst) { + bool external_media = (dst->dev == dev); + + dst_release(dst); + if (external_media) + return NF_ACCEPT; + } } /* We need to check whether the registration exists before attempting -- cgit From 05b7639da55f5555b9866a1f4b7e8995232a6323 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 12 Mar 2019 12:10:59 +0100 Subject: netfilter: nft_set_rbtree: check for inactive element after flag mismatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise, we hit bogus ENOENT when removing elements. Fixes: e701001e7cbe ("netfilter: nft_rbtree: allow adjacent intervals with dynamic updates") Reported-by: Václav Zindulka Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_set_rbtree.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index fa61208371f8..321a0036fdf5 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -308,10 +308,6 @@ static void *nft_rbtree_deactivate(const struct net *net, else if (d > 0) parent = parent->rb_right; else { - if (!nft_set_elem_active(&rbe->ext, genmask)) { - parent = parent->rb_left; - continue; - } if (nft_rbtree_interval_end(rbe) && !nft_rbtree_interval_end(this)) { parent = parent->rb_left; @@ -320,6 +316,9 @@ static void *nft_rbtree_deactivate(const struct net *net, nft_rbtree_interval_end(this)) { parent = parent->rb_right; continue; + } else if (!nft_set_elem_active(&rbe->ext, genmask)) { + parent = parent->rb_left; + continue; } nft_rbtree_flush(net, set, rbe); return rbe; -- cgit From e166e4fdaced850bee3d5ee12a5740258fb30587 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 13 Mar 2019 16:33:29 +0800 Subject: netfilter: bridge: set skb transport_header before entering NF_INET_PRE_ROUTING Since Commit 21d1196a35f5 ("ipv4: set transport header earlier"), skb->transport_header has been always set before entering INET netfilter. This patch is to set skb->transport_header for bridge before entering INET netfilter by bridge-nf-call-iptables. It also fixes an issue that sctp_error() couldn't compute a right csum due to unset skb->transport_header. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Reported-by: Li Shuang Suggested-by: Pablo Neira Ayuso Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/br_netfilter_hooks.c | 1 + net/bridge/br_netfilter_ipv6.c | 2 ++ 2 files changed, 3 insertions(+) diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 9d34de68571b..22afa566cbce 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -502,6 +502,7 @@ static unsigned int br_nf_pre_routing(void *priv, nf_bridge->ipv4_daddr = ip_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IP); + skb->transport_header = skb->network_header + ip_hdr(skb)->ihl * 4; NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 564710f88f93..e88d6641647b 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -235,6 +235,8 @@ unsigned int br_nf_pre_routing_ipv6(void *priv, nf_bridge->ipv6_daddr = ipv6_hdr(skb)->daddr; skb->protocol = htons(ETH_P_IPV6); + skb->transport_header = skb->network_header + sizeof(struct ipv6hdr); + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->net, state->sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_ipv6); -- cgit From d1fa381033eb718df5c602f64b6e88676138dfc6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 13 Mar 2019 22:15:59 +0100 Subject: netfilter: fix NETFILTER_XT_TARGET_TEE dependencies MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With NETFILTER_XT_TARGET_TEE=y and IP6_NF_IPTABLES=m, we get a link error when referencing the NF_DUP_IPV6 module: net/netfilter/xt_TEE.o: In function `tee_tg6': xt_TEE.c:(.text+0x14): undefined reference to `nf_dup_ipv6' The problem here is the 'select NF_DUP_IPV6 if IP6_NF_IPTABLES' that forces NF_DUP_IPV6 to be =m as well rather than setting it to =y as was intended here. Adding a soft dependency on IP6_NF_IPTABLES avoids that broken configuration. Fixes: 5d400a4933e8 ("netfilter: Kconfig: Change select IPv6 dependencies") Cc: Máté Eckl Cc: Taehee Yoo Link: https://patchwork.ozlabs.org/patch/999498/ Link: https://lore.kernel.org/patchwork/patch/960062/ Reported-by: Randy Dunlap Reported-by: Stephen Rothwell Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- net/netfilter/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d43ffb09939b..6548271209a0 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -1007,6 +1007,7 @@ config NETFILTER_XT_TARGET_TEE depends on NETFILTER_ADVANCED depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK + depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES select NF_DUP_IPV4 select NF_DUP_IPV6 if IP6_NF_IPTABLES ---help--- -- cgit From 6d65561f3d5ec933151939c543d006b79044e7a6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 02:58:18 -0500 Subject: netfilter: ip6t_srh: fix NULL pointer dereferences skb_header_pointer may return NULL. The current code dereference its return values without a NULL check. The fix inserts the checks to avoid NULL pointer dereferences. Fixes: 202a8ff545cc ("netfilter: add IPv6 segment routing header 'srh' match") Signed-off-by: Kangjie Lu Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/ip6t_srh.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c index 1059894a6f4c..4cb83fb69844 100644 --- a/net/ipv6/netfilter/ip6t_srh.c +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -210,6 +210,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) psidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left + 1) * sizeof(struct in6_addr)); psid = skb_header_pointer(skb, psidoff, sizeof(_psid), &_psid); + if (!psid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_PSID, ipv6_masked_addr_cmp(psid, &srhinfo->psid_msk, &srhinfo->psid_addr))) @@ -223,6 +225,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) nsidoff = srhoff + sizeof(struct ipv6_sr_hdr) + ((srh->segments_left - 1) * sizeof(struct in6_addr)); nsid = skb_header_pointer(skb, nsidoff, sizeof(_nsid), &_nsid); + if (!nsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_NSID, ipv6_masked_addr_cmp(nsid, &srhinfo->nsid_msk, &srhinfo->nsid_addr))) @@ -233,6 +237,8 @@ static bool srh1_mt6(const struct sk_buff *skb, struct xt_action_param *par) if (srhinfo->mt_flags & IP6T_SRH_LSID) { lsidoff = srhoff + sizeof(struct ipv6_sr_hdr); lsid = skb_header_pointer(skb, lsidoff, sizeof(_lsid), &_lsid); + if (!lsid) + return false; if (NF_SRH_INVF(srhinfo, IP6T_SRH_INV_LSID, ipv6_masked_addr_cmp(lsid, &srhinfo->lsid_msk, &srhinfo->lsid_addr))) -- cgit From 8ffcd32f64633926163cdd07a7d295c500a947d1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 14 Mar 2019 10:50:20 +0100 Subject: netfilter: nf_tables: bogus EBUSY in helper removal from transaction Proper use counter updates when activating and deactivating the object, otherwise, this hits bogus EBUSY error. Fixes: cd5125d8f518 ("netfilter: nf_tables: split set destruction in deactivate and destroy phase") Reported-by: Laura Garcia Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_objref.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 457a9ceb46af..8dfa798ea683 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -65,21 +65,34 @@ nla_put_failure: return -1; } -static void nft_objref_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_objref_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr, + enum nft_trans_phase phase) { struct nft_object *obj = nft_objref_priv(expr); + if (phase == NFT_TRANS_COMMIT) + return; + obj->use--; } +static void nft_objref_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_object *obj = nft_objref_priv(expr); + + obj->use++; +} + static struct nft_expr_type nft_objref_type; static const struct nft_expr_ops nft_objref_ops = { .type = &nft_objref_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_object *)), .eval = nft_objref_eval, .init = nft_objref_init, - .destroy = nft_objref_destroy, + .activate = nft_objref_activate, + .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, }; -- cgit From 74710e05906c37da6b436386dc13c44dbe5d8308 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 15 Mar 2019 17:21:01 +0100 Subject: netfilter: nft_redir: fix module autoload with ip4 AF_INET4 does not exist. Fixes: c78efc99c750 ("netfilter: nf_tables: nat: merge nft_redir protocol specific modules)" Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_redir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index f8092926f704..a340cd8a751b 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -233,5 +233,5 @@ module_exit(nft_redir_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Arturo Borrero Gonzalez "); -MODULE_ALIAS_NFT_AF_EXPR(AF_INET4, "redir"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "redir"); MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "redir"); -- cgit From d3ca4651d05c0ff7259d087d8c949bcf3e14fb46 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 11 Mar 2019 15:04:18 +0100 Subject: udf: Fix crash on IO error during truncate When truncate(2) hits IO error when reading indirect extent block the code just bugs with: kernel BUG at linux-4.15.0/fs/udf/truncate.c:249! ... Fix the problem by bailing out cleanly in case of IO error. CC: stable@vger.kernel.org Reported-by: jean-luc malet Signed-off-by: Jan Kara --- fs/udf/truncate.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index b647f0bd150c..94220ba85628 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -260,6 +260,9 @@ void udf_truncate_extents(struct inode *inode) epos.block = eloc; epos.bh = udf_tread(sb, udf_get_lb_pblock(sb, &eloc, 0)); + /* Error reading indirect block? */ + if (!epos.bh) + return; if (elen) indirect_ext_len = (elen + sb->s_blocksize - 1) >> -- cgit From 2b42be5eb24564227b15e66f54f088e5a26549c7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Mon, 11 Mar 2019 15:27:02 +0100 Subject: udf: Propagate errors from udf_truncate_extents() Make udf_truncate_extents() properly propagate errors to its callers and let udf_setsize() handle the error properly as well. This lets userspace know in case there's some error when truncating blocks. Signed-off-by: Jan Kara --- fs/udf/inode.c | 4 +++- fs/udf/truncate.c | 7 ++++--- fs/udf/udfdecl.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index ae796e10f68b..e7276932e433 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1242,8 +1242,10 @@ set_size: truncate_setsize(inode, newsize); down_write(&iinfo->i_data_sem); udf_clear_extent_cache(inode); - udf_truncate_extents(inode); + err = udf_truncate_extents(inode); up_write(&iinfo->i_data_sem); + if (err) + return err; } update_time: inode->i_mtime = inode->i_ctime = current_time(inode); diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c index 94220ba85628..63a47f1e1d52 100644 --- a/fs/udf/truncate.c +++ b/fs/udf/truncate.c @@ -199,7 +199,7 @@ static void udf_update_alloc_ext_desc(struct inode *inode, * for making file shorter. For making file longer, udf_extend_file() has to * be used. */ -void udf_truncate_extents(struct inode *inode) +int udf_truncate_extents(struct inode *inode) { struct extent_position epos; struct kernel_lb_addr eloc, neloc = {}; @@ -224,7 +224,7 @@ void udf_truncate_extents(struct inode *inode) if (etype == -1) { /* We should extend the file? */ WARN_ON(byte_offset); - return; + return 0; } epos.offset -= adsize; extent_trunc(inode, &epos, &eloc, etype, elen, byte_offset); @@ -262,7 +262,7 @@ void udf_truncate_extents(struct inode *inode) udf_get_lb_pblock(sb, &eloc, 0)); /* Error reading indirect block? */ if (!epos.bh) - return; + return -EIO; if (elen) indirect_ext_len = (elen + sb->s_blocksize - 1) >> @@ -286,4 +286,5 @@ void udf_truncate_extents(struct inode *inode) iinfo->i_lenExtents = inode->i_size; brelse(epos.bh); + return 0; } diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h index ee246769dee4..d89ef71887fc 100644 --- a/fs/udf/udfdecl.h +++ b/fs/udf/udfdecl.h @@ -235,7 +235,7 @@ extern struct inode *udf_new_inode(struct inode *, umode_t); /* truncate.c */ extern void udf_truncate_tail_extent(struct inode *); extern void udf_discard_prealloc(struct inode *); -extern void udf_truncate_extents(struct inode *); +extern int udf_truncate_extents(struct inode *); /* balloc.c */ extern void udf_free_blocks(struct super_block *, struct inode *, -- cgit From f01a7dbe98ae4265023fa5d3af0f076f0b18a647 Mon Sep 17 00:00:00 2001 From: Martynas Pumputis Date: Mon, 18 Mar 2019 16:10:26 +0100 Subject: bpf: Try harder when allocating memory for large maps It has been observed that sometimes a higher order memory allocation for BPF maps fails when there is no obvious memory pressure in a system. E.g. the map (BPF_MAP_TYPE_LRU_HASH, key=38, value=56, max_elems=524288) could not be created due to vmalloc unable to allocate 75497472B, when the system's memory consumption (in MB) was the following: Total: 3942 Used: 837 (21.24%) Free: 138 Buffers: 239 Cached: 2727 Later analysis [1] by Michal Hocko showed that the vmalloc was not trying to reclaim memory from the page cache and was failing prematurely due to __GFP_NORETRY. Considering dcda9b0471 ("mm, tree wide: replace __GFP_REPEAT by __GFP_RETRY_MAYFAIL with more useful semantic") and [1], we can replace __GFP_NORETRY with __GFP_RETRY_MAYFAIL, as it won't invoke OOM killer and will try harder to fulfil allocation requests. Unfortunately, replacing the body of the BPF map memory allocation function with the kvmalloc_node helper function is not an option at this point in time, given 1) kmalloc is non-optional for higher order allocations, and 2) passing __GFP_RETRY_MAYFAIL to the kmalloc would stress the slab allocator too much for large requests. The change has been tested with the workloads mentioned above and by observing oom_kill value from /proc/vmstat. [1]: https://lore.kernel.org/bpf/20190310071318.GW5232@dhcp22.suse.cz/ Signed-off-by: Martynas Pumputis Acked-by: Yonghong Song Cc: Michal Hocko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20190318153940.GL8924@dhcp22.suse.cz/ --- kernel/bpf/syscall.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 62f6bced3a3c..afca36f53c49 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -136,21 +136,29 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) void *bpf_map_area_alloc(size_t size, int numa_node) { - /* We definitely need __GFP_NORETRY, so OOM killer doesn't - * trigger under memory pressure as we really just want to - * fail instead. + /* We really just want to fail instead of triggering OOM killer + * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, + * which is used for lower order allocation requests. + * + * It has been observed that higher order allocation requests done by + * vmalloc with __GFP_NORETRY being set might fail due to not trying + * to reclaim memory from the page cache, thus we set + * __GFP_RETRY_MAYFAIL to avoid such situations. */ - const gfp_t flags = __GFP_NOWARN | __GFP_NORETRY | __GFP_ZERO; + + const gfp_t flags = __GFP_NOWARN | __GFP_ZERO; void *area; if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - area = kmalloc_node(size, GFP_USER | flags, numa_node); + area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, + numa_node); if (area != NULL) return area; } - return __vmalloc_node_flags_caller(size, numa_node, GFP_KERNEL | flags, - __builtin_return_address(0)); + return __vmalloc_node_flags_caller(size, numa_node, + GFP_KERNEL | __GFP_RETRY_MAYFAIL | + flags, __builtin_return_address(0)); } void bpf_map_area_free(void *area) -- cgit From 6a1afffb08ce5f9fb9ccc20f7ab24846c0142984 Mon Sep 17 00:00:00 2001 From: Ondrej Mosnacek Date: Sun, 17 Mar 2019 14:46:53 +0100 Subject: selinux: fix NULL dereference in policydb_destroy() The conversion to kvmalloc() forgot to account for the possibility that p->type_attr_map_array might be null in policydb_destroy(). Fix this by destroying its contents only if it is not NULL. Also make sure ebitmap_init() is called on all entries before policydb_destroy() can be called. Right now this is a no-op, because both kvcalloc() and ebitmap_init() just zero out the whole struct, but let's rather not rely on a specific implementation. Reported-by: syzbot+a57b2aff60832666fc28@syzkaller.appspotmail.com Fixes: acdf52d97f82 ("selinux: convert to kvmalloc") Signed-off-by: Ondrej Mosnacek Acked-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/ss/policydb.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/security/selinux/ss/policydb.c b/security/selinux/ss/policydb.c index 6b576e588725..daecdfb15a9c 100644 --- a/security/selinux/ss/policydb.c +++ b/security/selinux/ss/policydb.c @@ -828,9 +828,11 @@ void policydb_destroy(struct policydb *p) hashtab_map(p->range_tr, range_tr_destroy, NULL); hashtab_destroy(p->range_tr); - for (i = 0; i < p->p_types.nprim; i++) - ebitmap_destroy(&p->type_attr_map_array[i]); - kvfree(p->type_attr_map_array); + if (p->type_attr_map_array) { + for (i = 0; i < p->p_types.nprim; i++) + ebitmap_destroy(&p->type_attr_map_array[i]); + kvfree(p->type_attr_map_array); + } ebitmap_destroy(&p->filename_trans_ttypes); ebitmap_destroy(&p->policycaps); @@ -2496,10 +2498,13 @@ int policydb_read(struct policydb *p, void *fp) if (!p->type_attr_map_array) goto bad; + /* just in case ebitmap_init() becomes more than just a memset(0): */ + for (i = 0; i < p->p_types.nprim; i++) + ebitmap_init(&p->type_attr_map_array[i]); + for (i = 0; i < p->p_types.nprim; i++) { struct ebitmap *e = &p->type_attr_map_array[i]; - ebitmap_init(e); if (p->policyvers >= POLICYDB_VERSION_AVTAB) { rc = ebitmap_read(e, fp); if (rc) -- cgit From fd6fab2cb78d3b6023c26ec53e0aa6f0b477d2f7 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Mar 2019 16:30:06 -0600 Subject: io_uring: retry bulk slab allocs as single allocs I've seen cases where bulk alloc fails, since the bulk alloc API is all-or-nothing - either we get the number we ask for, or it returns 0 as number of entries. If we fail a batch bulk alloc, retry a "normal" kmem_cache_alloc() and just use that instead of failing with -EAGAIN. While in there, ensure we use GFP_KERNEL. That was an oversight in the original code, when we switched away from GFP_ATOMIC. Signed-off-by: Jens Axboe --- fs/io_uring.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 12bb238aed6b..4c6a5e60ddbe 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -399,13 +399,14 @@ static void io_ring_drop_ctx_refs(struct io_ring_ctx *ctx, unsigned refs) static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, struct io_submit_state *state) { + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; struct io_kiocb *req; if (!percpu_ref_tryget(&ctx->refs)) return NULL; if (!state) { - req = kmem_cache_alloc(req_cachep, __GFP_NOWARN); + req = kmem_cache_alloc(req_cachep, gfp); if (unlikely(!req)) goto out; } else if (!state->free_reqs) { @@ -413,10 +414,18 @@ static struct io_kiocb *io_get_req(struct io_ring_ctx *ctx, int ret; sz = min_t(size_t, state->ios_left, ARRAY_SIZE(state->reqs)); - ret = kmem_cache_alloc_bulk(req_cachep, __GFP_NOWARN, sz, - state->reqs); - if (unlikely(ret <= 0)) - goto out; + ret = kmem_cache_alloc_bulk(req_cachep, gfp, sz, state->reqs); + + /* + * Bulk alloc is all-or-nothing. If we fail to get a batch, + * retry single alloc to be on the safe side. + */ + if (unlikely(ret <= 0)) { + state->reqs[0] = kmem_cache_alloc(req_cachep, gfp); + if (!state->reqs[0]) + goto out; + ret = 1; + } state->free_reqs = ret - 1; state->cur_req = 1; req = state->reqs[0]; -- cgit From bf33a7699e992b12d4c7d39dc3f0b61f6b26c5c2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 14 Mar 2019 15:22:18 -0600 Subject: io_uring: mark me as the maintainer And io_uring as maintained in general. Signed-off-by: Jens Axboe --- MAINTAINERS | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 17b59b66474b..a90137af48c2 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8091,6 +8091,16 @@ F: include/linux/iommu.h F: include/linux/of_iommu.h F: include/linux/iova.h +IO_URING +M: Jens Axboe +L: linux-block@vger.kernel.org +L: linux-fsdevel@vger.kernel.org +T: git git://git.kernel.dk/linux-block +T: git git://git.kernel.dk/liburing +S: Maintained +F: fs/io_uring.c +F: include/uapi/linux/io_uring.h + IP MASQUERADING M: Juanjo Ciarlante S: Maintained -- cgit From 875f1d0769cdcfe1596ff0ca609b453359e42ec9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Feb 2019 13:05:25 -0700 Subject: iov_iter: add ITER_BVEC_FLAG_NO_REF flag For ITER_BVEC, if we're holding on to kernel pages, the caller doesn't need to grab a reference to the bvec pages, and drop that same reference on IO completion. This is essentially safe for any ITER_BVEC, but some use cases end up reusing pages and uncondtionally dropping a page reference on completion. And example of that is sendfile(2), that ends up being a splice_in + splice_out on the pipe pages. Add a flag that tells us it's fine to not grab a page reference to the bvec pages, since that caller knows not to drop a reference when it's done with the pages. Signed-off-by: Jens Axboe --- fs/io_uring.c | 3 +++ include/linux/uio.h | 24 +++++++++++++++++++----- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 4c6a5e60ddbe..c592a0933b0d 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -855,6 +855,9 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); if (offset) iov_iter_advance(iter, offset); + + /* don't drop a reference to these pages */ + iter->type |= ITER_BVEC_FLAG_NO_REF; return 0; } diff --git a/include/linux/uio.h b/include/linux/uio.h index ecf584f6b82d..4e926641fa80 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -23,14 +23,23 @@ struct kvec { }; enum iter_type { - ITER_IOVEC = 0, - ITER_KVEC = 2, - ITER_BVEC = 4, - ITER_PIPE = 8, - ITER_DISCARD = 16, + /* set if ITER_BVEC doesn't hold a bv_page ref */ + ITER_BVEC_FLAG_NO_REF = 2, + + /* iter types */ + ITER_IOVEC = 4, + ITER_KVEC = 8, + ITER_BVEC = 16, + ITER_PIPE = 32, + ITER_DISCARD = 64, }; struct iov_iter { + /* + * Bit 0 is the read/write bit, set if we're writing. + * Bit 1 is the BVEC_FLAG_NO_REF bit, set if type is a bvec and + * the caller isn't expecting to drop a page reference when done. + */ unsigned int type; size_t iov_offset; size_t count; @@ -84,6 +93,11 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) return i->type & (READ | WRITE); } +static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i) +{ + return (i->type & ITER_BVEC_FLAG_NO_REF) != 0; +} + /* * Total number of bytes covered by an iovec. * -- cgit From 399254aaf4892113c806816f7e64cf40c804d46d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Feb 2019 13:13:23 -0700 Subject: block: add BIO_NO_PAGE_REF flag If bio_iov_iter_get_pages() is called on an iov_iter that is flagged with NO_REF, then we don't need to add a page reference for the pages that we add. Add BIO_NO_PAGE_REF to track this in the bio, so IO completion knows not to drop a reference to these pages. Signed-off-by: Jens Axboe --- block/bio.c | 43 ++++++++++++++++++++++++------------------- fs/block_dev.c | 12 +++++++----- fs/iomap.c | 12 +++++++----- include/linux/blk_types.h | 1 + 4 files changed, 39 insertions(+), 29 deletions(-) diff --git a/block/bio.c b/block/bio.c index 71a78d9fb8b7..b64cedc7f87c 100644 --- a/block/bio.c +++ b/block/bio.c @@ -849,20 +849,14 @@ static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter) size = bio_add_page(bio, bv->bv_page, len, bv->bv_offset + iter->iov_offset); if (size == len) { - struct page *page; - int i; + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { + struct page *page; + int i; + + mp_bvec_for_each_page(page, bv, i) + get_page(page); + } - /* - * For the normal O_DIRECT case, we could skip grabbing this - * reference and then not have to put them again when IO - * completes. But this breaks some in-kernel users, like - * splicing to/from a loop device, where we release the pipe - * pages unconditionally. If we can fix that case, we can - * get rid of the get here and the need to call - * bio_release_pages() at IO completion time. - */ - mp_bvec_for_each_page(page, bv, i) - get_page(page); iov_iter_advance(iter, size); return 0; } @@ -925,10 +919,12 @@ static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) * This takes either an iterator pointing to user memory, or one pointing to * kernel pages (BVEC iterator). If we're adding user pages, we pin them and * map them into the kernel. On IO completion, the caller should put those - * pages. For now, when adding kernel pages, we still grab a reference to the - * page. This isn't strictly needed for the common case, but some call paths - * end up releasing pages from eg a pipe and we can't easily control these. - * See comment in __bio_iov_bvec_add_pages(). + * pages. If we're adding kernel pages, and the caller told us it's safe to + * do so, we just have to add the pages to the bio directly. We don't grab an + * extra reference to those pages (the user should already have that), and we + * don't put the page on IO completion. The caller needs to check if the bio is + * flagged BIO_NO_PAGE_REF on IO completion. If it isn't, then pages should be + * released. * * The function tries, but does not guarantee, to pin as many pages as * fit into the bio, or are requested in *iter, whatever is smaller. If @@ -940,6 +936,13 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) const bool is_bvec = iov_iter_is_bvec(iter); unsigned short orig_vcnt = bio->bi_vcnt; + /* + * If this is a BVEC iter, then the pages are kernel pages. Don't + * release them on IO completion, if the caller asked us to. + */ + if (is_bvec && iov_iter_bvec_no_ref(iter)) + bio_set_flag(bio, BIO_NO_PAGE_REF); + do { int ret; @@ -1696,7 +1699,8 @@ static void bio_dirty_fn(struct work_struct *work) next = bio->bi_private; bio_set_pages_dirty(bio); - bio_release_pages(bio); + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + bio_release_pages(bio); bio_put(bio); } } @@ -1713,7 +1717,8 @@ void bio_check_pages_dirty(struct bio *bio) goto defer; } - bio_release_pages(bio); + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) + bio_release_pages(bio); bio_put(bio); return; defer: diff --git a/fs/block_dev.c b/fs/block_dev.c index e9faa52bb489..78d3257435c0 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -336,12 +336,14 @@ static void blkdev_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - struct bio_vec *bvec; - int i; - struct bvec_iter_all iter_all; + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { + struct bvec_iter_all iter_all; + struct bio_vec *bvec; + int i; - bio_for_each_segment_all(bvec, bio, i, iter_all) - put_page(bvec->bv_page); + bio_for_each_segment_all(bvec, bio, i, iter_all) + put_page(bvec->bv_page); + } bio_put(bio); } } diff --git a/fs/iomap.c b/fs/iomap.c index 97cb9d486a7d..abdd18e404f8 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1589,12 +1589,14 @@ static void iomap_dio_bio_end_io(struct bio *bio) if (should_dirty) { bio_check_pages_dirty(bio); } else { - struct bio_vec *bvec; - int i; - struct bvec_iter_all iter_all; + if (!bio_flagged(bio, BIO_NO_PAGE_REF)) { + struct bvec_iter_all iter_all; + struct bio_vec *bvec; + int i; - bio_for_each_segment_all(bvec, bio, i, iter_all) - put_page(bvec->bv_page); + bio_for_each_segment_all(bvec, bio, i, iter_all) + put_page(bvec->bv_page); + } bio_put(bio); } } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d66bf5f32610..791fee35df88 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -215,6 +215,7 @@ struct bio { /* * bio flags */ +#define BIO_NO_PAGE_REF 0 /* don't put release vec pages */ #define BIO_SEG_VALID 1 /* bi_phys_segments valid */ #define BIO_CLONED 2 /* doesn't own data */ #define BIO_BOUNCED 3 /* bio is a bounce bio */ -- cgit From 25208dd856e74f2b60d053eb98e6dd335816fbc1 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 18 Mar 2019 12:08:58 +0100 Subject: doc: fix link to MSG_ZEROCOPY patchset Use https and link to the patch directly. Signed-off-by: Tobias Klauser Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/msg_zerocopy.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Documentation/networking/msg_zerocopy.rst b/Documentation/networking/msg_zerocopy.rst index 18c1415e7bfa..ace56204dd03 100644 --- a/Documentation/networking/msg_zerocopy.rst +++ b/Documentation/networking/msg_zerocopy.rst @@ -50,7 +50,7 @@ the excellent reporting over at LWN.net or read the original code. patchset [PATCH net-next v4 0/9] socket sendmsg MSG_ZEROCOPY - http://lkml.kernel.org/r/20170803202945.70750-1-willemdebruijn.kernel@gmail.com + https://lkml.kernel.org/netdev/20170803202945.70750-1-willemdebruijn.kernel@gmail.com Interface -- cgit From 3028efe03be9c8c4cd7923f0f3c39b2871cc8a8f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 18 Mar 2019 17:00:28 +0000 Subject: NFS: Fix nfs4_lock_state refcounting in nfs4_alloc_{lock,unlock}data() Commit 7b587e1a5a6c ("NFS: use locks_copy_lock() to copy locks.") changed the lock copying from memcpy() to the dedicated locks_copy_lock() function. The latter correctly increments the nfs4_lock_state.ls_count via nfs4_fl_copy_lock(), however, this refcount has already been incremented in the nfs4_alloc_{lock,unlock}data(). Kmemleak subsequently reports an unreferenced nfs4_lock_state object as below (arm64 platform): unreferenced object 0xffff8000fce0b000 (size 256): comm "systemd-sysuser", pid 1608, jiffies 4294892825 (age 32.348s) hex dump (first 32 bytes): 20 57 4c fb 00 80 ff ff 20 57 4c fb 00 80 ff ff WL..... WL..... 00 57 4c fb 00 80 ff ff 01 00 00 00 00 00 00 00 .WL............. backtrace: [<000000000d15010d>] kmem_cache_alloc+0x178/0x208 [<00000000d7c1d264>] nfs4_set_lock_state+0x124/0x1f0 [<000000009c867628>] nfs4_proc_lock+0x90/0x478 [<000000001686bd74>] do_setlk+0x64/0xe8 [<00000000e01500d4>] nfs_lock+0xe8/0x1f0 [<000000004f387d8d>] vfs_lock_file+0x18/0x40 [<00000000656ab79b>] do_lock_file_wait+0x68/0xf8 [<00000000f17c4a4b>] fcntl_setlk+0x224/0x280 [<0000000052a242c6>] do_fcntl+0x418/0x730 [<000000004f47291a>] __arm64_sys_fcntl+0x84/0xd0 [<00000000d6856e01>] el0_svc_common+0x80/0xf0 [<000000009c4bd1df>] el0_svc_handler+0x2c/0x80 [<00000000b1a0d479>] el0_svc+0x8/0xc [<0000000056c62a0f>] 0xffffffffffffffff This patch removes the original refcount_inc(&lsp->ls_count) that was paired with the memcpy() lock copying. Fixes: 7b587e1a5a6c ("NFS: use locks_copy_lock() to copy locks.") Cc: # 5.0.x- Cc: NeilBrown Signed-off-by: Catalin Marinas Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 4dbb0ee23432..6d2812a39287 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -6301,7 +6301,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, p->arg.seqid = seqid; p->res.seqid = seqid; p->lsp = lsp; - refcount_inc(&lsp->ls_count); /* Ensure we don't close file until we're done freeing locks! */ p->ctx = get_nfs_open_context(ctx); p->l_ctx = nfs_get_lock_context(ctx); @@ -6526,7 +6525,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->res.lock_seqid = p->arg.lock_seqid; p->lsp = lsp; p->server = server; - refcount_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); locks_init_lock(&p->fl); locks_copy_lock(&p->fl, fl); -- cgit From 6f8f89ce1e18de1e391c9c1c14e7738881d1c00c Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Mon, 11 Mar 2019 09:58:38 -0600 Subject: platform/chrome: Fix locking pattern in wilco_ec_mailbox() Before, ec->data_buffer could be written to from multiple contexts at the same time. Since the ec is shared data, it needs to be inside the mutex as well. Fixes: 7b3d4f44abf0 ("platform/chrome: Add new driver for Wilco EC") Signed-off-by: Nick Crews Signed-off-by: Enric Balletbo i Serra Signed-off-by: Benson Leung --- drivers/platform/chrome/wilco_ec/mailbox.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/platform/chrome/wilco_ec/mailbox.c b/drivers/platform/chrome/wilco_ec/mailbox.c index f6ff29a11f1a..14355668ddfa 100644 --- a/drivers/platform/chrome/wilco_ec/mailbox.c +++ b/drivers/platform/chrome/wilco_ec/mailbox.c @@ -223,11 +223,11 @@ int wilco_ec_mailbox(struct wilco_ec_device *ec, struct wilco_ec_message *msg) msg->command, msg->type, msg->flags, msg->response_size, msg->request_size); + mutex_lock(&ec->mailbox_lock); /* Prepare request packet */ rq = ec->data_buffer; wilco_ec_prepare(msg, rq); - mutex_lock(&ec->mailbox_lock); ret = wilco_ec_transfer(ec, msg, rq); mutex_unlock(&ec->mailbox_lock); -- cgit From e9abc611a941d4051cde1d94b2ab7473fdb50102 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Wed, 20 Feb 2019 22:40:06 +0000 Subject: drm/rockchip: vop: reset scale mode when win is disabled NV12 framebuffers produced by the VPU shows distorted on RK3288 after win has been disabled when scaling is active. This issue can be reproduced using a 1080p modeset by: - Scale a 1280x720 NV12 framebuffer to 1920x1080 on win0 - Disable win0 - Display a 1920x1080 NV12 framebuffer without scaling on win0 - Output will now show the framebuffer distorted And by: - Scale a 1280x720 NV12 framebuffer to 1920x1080 - Change to a 720p modeset (win gets disabled and scaling reset to none) - Output will now show the framebuffer distorted Fix this by setting scale mode to none when win is disabled. Fixes: 4c156c21c794 ("drm/rockchip: vop: support plane scale") Cc: stable@vger.kernel.org Signed-off-by: Jonas Karlman Signed-off-by: Heiko Stuebner Link: https://patchwork.freedesktop.org/patch/msgid/AM3PR03MB0966DE3E19BACE07328CD637AC7D0@AM3PR03MB0966.eurprd03.prod.outlook.com --- drivers/gpu/drm/rockchip/rockchip_drm_vop.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c index c7d4c6073ea5..0d4ade9d4722 100644 --- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c +++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c @@ -541,6 +541,18 @@ static void vop_core_clks_disable(struct vop *vop) clk_disable(vop->hclk); } +static void vop_win_disable(struct vop *vop, const struct vop_win_data *win) +{ + if (win->phy->scl && win->phy->scl->ext) { + VOP_SCL_SET_EXT(vop, win, yrgb_hor_scl_mode, SCALE_NONE); + VOP_SCL_SET_EXT(vop, win, yrgb_ver_scl_mode, SCALE_NONE); + VOP_SCL_SET_EXT(vop, win, cbcr_hor_scl_mode, SCALE_NONE); + VOP_SCL_SET_EXT(vop, win, cbcr_ver_scl_mode, SCALE_NONE); + } + + VOP_WIN_SET(vop, win, enable, 0); +} + static int vop_enable(struct drm_crtc *crtc) { struct vop *vop = to_vop(crtc); @@ -586,7 +598,7 @@ static int vop_enable(struct drm_crtc *crtc) struct vop_win *vop_win = &vop->win[i]; const struct vop_win_data *win = vop_win->data; - VOP_WIN_SET(vop, win, enable, 0); + vop_win_disable(vop, win); } spin_unlock(&vop->reg_lock); @@ -735,7 +747,7 @@ static void vop_plane_atomic_disable(struct drm_plane *plane, spin_lock(&vop->reg_lock); - VOP_WIN_SET(vop, win, enable, 0); + vop_win_disable(vop, win); spin_unlock(&vop->reg_lock); } @@ -1622,7 +1634,7 @@ static int vop_initial(struct vop *vop) int channel = i * 2 + 1; VOP_WIN_SET(vop, win, channel, (channel + 1) << 4 | channel); - VOP_WIN_SET(vop, win, enable, 0); + vop_win_disable(vop, win); VOP_WIN_SET(vop, win, gate, 1); } -- cgit From 3897b6f0a859288c22fb793fad11ec2327e60fcd Mon Sep 17 00:00:00 2001 From: Andrea Righi Date: Thu, 14 Mar 2019 08:56:28 +0100 Subject: btrfs: raid56: properly unmap parity page in finish_parity_scrub() Parity page is incorrectly unmapped in finish_parity_scrub(), triggering a reference counter bug on i386, i.e.: [ 157.662401] kernel BUG at mm/highmem.c:349! [ 157.666725] invalid opcode: 0000 [#1] SMP PTI The reason is that kunmap(p_page) was completely left out, so we never did an unmap for the p_page and the loop unmapping the rbio page was iterating over the wrong number of stripes: unmapping should be done with nr_data instead of rbio->real_stripes. Test case to reproduce the bug: - create a raid5 btrfs filesystem: # mkfs.btrfs -m raid5 -d raid5 /dev/sdb /dev/sdc /dev/sdd /dev/sde - mount it: # mount /dev/sdb /mnt - run btrfs scrub in a loop: # while :; do btrfs scrub start -BR /mnt; done BugLink: https://bugs.launchpad.net/bugs/1812845 Fixes: 5a6ac9eacb49 ("Btrfs, raid56: support parity scrub on raid56") CC: stable@vger.kernel.org # 4.4+ Reviewed-by: Johannes Thumshirn Signed-off-by: Andrea Righi Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index e74455eb42f9..6976e2280771 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2429,8 +2429,9 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, bitmap_clear(rbio->dbitmap, pagenr, 1); kunmap(p); - for (stripe = 0; stripe < rbio->real_stripes; stripe++) + for (stripe = 0; stripe < nr_data; stripe++) kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); + kunmap(p_page); } __free_page(p_page); -- cgit From 16d80c54ad42c573a897ae7bcf5a9816be54e6fe Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 15 Mar 2019 14:50:04 +0100 Subject: rbd: set io_min, io_opt and discard_granularity to alloc_size Now that we have alloc_size that controls our discard behavior, it doesn't make sense to have these set to object (set) size. alloc_size defaults to 64k, but because discard_granularity is likely 4M, only ranges that are equal to or bigger than 4M can be considered during fstrim. A smaller io_min is also more likely to be met, resulting in fewer deferred writes on bluestore OSDs. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 4ba967d65cf9..3b2c9289dccb 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -833,7 +833,7 @@ static int parse_rbd_opts_token(char *c, void *private) pctx->opts->queue_depth = intval; break; case Opt_alloc_size: - if (intval < 1) { + if (intval < SECTOR_SIZE) { pr_err("alloc_size out of range\n"); return -EINVAL; } @@ -4203,12 +4203,12 @@ static int rbd_init_disk(struct rbd_device *rbd_dev) q->limits.max_sectors = queue_max_hw_sectors(q); blk_queue_max_segments(q, USHRT_MAX); blk_queue_max_segment_size(q, UINT_MAX); - blk_queue_io_min(q, objset_bytes); - blk_queue_io_opt(q, objset_bytes); + blk_queue_io_min(q, rbd_dev->opts->alloc_size); + blk_queue_io_opt(q, rbd_dev->opts->alloc_size); if (rbd_dev->opts->trim) { blk_queue_flag_set(QUEUE_FLAG_DISCARD, q); - q->limits.discard_granularity = objset_bytes; + q->limits.discard_granularity = rbd_dev->opts->alloc_size; blk_queue_max_discard_sectors(q, objset_bytes >> SECTOR_SHIFT); blk_queue_max_write_zeroes_sectors(q, objset_bytes >> SECTOR_SHIFT); } -- cgit From 165aa2bfb42904b1bec4bf2fa257c8c603c14a06 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 28 Jan 2019 15:24:42 +0100 Subject: scsi: iscsi: flush running unbind operations when removing a session In some cases, the iscsi_remove_session() function is called while an unbind_work operation is still running. This may cause a situation where sysfs objects are removed in an incorrect order, triggering a kernel warning. [ 605.249442] ------------[ cut here ]------------ [ 605.259180] sysfs group 'power' not found for kobject 'target2:0:0' [ 605.321371] WARNING: CPU: 1 PID: 26794 at fs/sysfs/group.c:235 sysfs_remove_group+0x76/0x80 [ 605.341266] Modules linked in: dm_service_time target_core_user target_core_pscsi target_core_file target_core_iblock iscsi_target_mod target_core_mod nls_utf8 isofs ppdev bochs_drm nfit ttm libnvdimm drm_kms_helper syscopyarea sysfillrect sysimgblt joydev pcspkr fb_sys_fops drm i2c_piix4 sg parport_pc parport xfs libcrc32c dm_multipath sr_mod sd_mod cdrom ata_generic 8021q garp mrp ata_piix stp crct10dif_pclmul crc32_pclmul llc libata crc32c_intel virtio_net net_failover ghash_clmulni_intel serio_raw failover sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi [ 605.627479] CPU: 1 PID: 26794 Comm: kworker/u32:2 Not tainted 4.18.0-60.el8.x86_64 #1 [ 605.721401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180724_192412-buildhw-07.phx2.fedoraproject.org-1.fc29 04/01/2014 [ 605.823651] Workqueue: scsi_wq_2 __iscsi_unbind_session [scsi_transport_iscsi] [ 605.830940] RIP: 0010:sysfs_remove_group+0x76/0x80 [ 605.922907] Code: 48 89 df 5b 5d 41 5c e9 38 c4 ff ff 48 89 df e8 e0 bf ff ff eb cb 49 8b 14 24 48 8b 75 00 48 c7 c7 38 73 cb a7 e8 24 77 d7 ff <0f> 0b 5b 5d 41 5c c3 0f 1f 00 0f 1f 44 00 00 41 56 41 55 41 54 55 [ 606.122304] RSP: 0018:ffffbadcc8d1bda8 EFLAGS: 00010286 [ 606.218492] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 606.326381] RDX: ffff98bdfe85eb40 RSI: ffff98bdfe856818 RDI: ffff98bdfe856818 [ 606.514498] RBP: ffffffffa7ab73e0 R08: 0000000000000268 R09: 0000000000000007 [ 606.529469] R10: 0000000000000000 R11: ffffffffa860d9ad R12: ffff98bdf978e838 [ 606.630535] R13: ffff98bdc2cd4010 R14: ffff98bdc2cd3ff0 R15: ffff98bdc2cd4000 [ 606.824707] FS: 0000000000000000(0000) GS:ffff98bdfe840000(0000) knlGS:0000000000000000 [ 607.018333] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 607.117844] CR2: 00007f84b78ac024 CR3: 000000002c00a003 CR4: 00000000003606e0 [ 607.117844] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 607.420926] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 607.524236] Call Trace: [ 607.530591] device_del+0x56/0x350 [ 607.624393] ? ata_tlink_match+0x30/0x30 [libata] [ 607.727805] ? attribute_container_device_trigger+0xb4/0xf0 [ 607.829911] scsi_target_reap_ref_release+0x39/0x50 [ 607.928572] scsi_remove_target+0x1a2/0x1d0 [ 608.017350] __iscsi_unbind_session+0xb3/0x160 [scsi_transport_iscsi] [ 608.117435] process_one_work+0x1a7/0x360 [ 608.132917] worker_thread+0x30/0x390 [ 608.222900] ? pwq_unbound_release_workfn+0xd0/0xd0 [ 608.323989] kthread+0x112/0x130 [ 608.418318] ? kthread_bind+0x30/0x30 [ 608.513821] ret_from_fork+0x35/0x40 [ 608.613909] ---[ end trace 0b98c310c8a6138c ]--- Signed-off-by: Maurizio Lombardi Acked-by: Chris Leech Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_transport_iscsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 0508831d6fb9..0a82e93566dc 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2200,6 +2200,8 @@ void iscsi_remove_session(struct iscsi_cls_session *session) scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); /* flush running scans then delete devices */ flush_work(&session->scan_work); + /* flush running unbind operations */ + flush_work(&session->unbind_work); __iscsi_unbind_session(&session->unbind_work); /* hw iscsi may not have removed all connections from session */ -- cgit From 6e0473633af059a559ce7b4cbaa51e389c94085e Mon Sep 17 00:00:00 2001 From: Thomas Preston Date: Wed, 6 Mar 2019 20:06:18 +0000 Subject: drm/i915/bios: assume eDP is present on port A when there is no VBT We rely on VBT DDI port info for eDP detection on GEN9 platforms and above. This breaks GEN9 platforms which don't have VBT because port A eDP now defaults to false. Fix this by defaulting to true when VBT is missing. Fixes: a98d9c1d7e9b ("drm/i915/ddi: Rely on VBT DDI port info for eDP detection") Signed-off-by: Thomas Preston Signed-off-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190306200618.17405-1-thomas.preston@codethink.co.uk (cherry picked from commit 2131bc0ced6088648e47f126566c3da58b07e4ef) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/intel_bios.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c index b508d8a735e0..4364f42cac6b 100644 --- a/drivers/gpu/drm/i915/intel_bios.c +++ b/drivers/gpu/drm/i915/intel_bios.c @@ -1673,6 +1673,7 @@ init_vbt_missing_defaults(struct drm_i915_private *dev_priv) info->supports_dvi = (port != PORT_A && port != PORT_E); info->supports_hdmi = info->supports_dvi; info->supports_dp = (port != PORT_E); + info->supports_edp = (port == PORT_A); } } -- cgit From 65f26e978d7c55c3c3d04296058d95cf7b6e3f14 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 15 Mar 2019 16:39:33 +0000 Subject: drm/i915: Fix off-by-one in reporting hanging process ffs() is 1-indexed, but we want to use it as an index into an array, so use __ffs() instead. Fixes: eb8d0f5af4ec ("drm/i915: Remove GPU reset dependence on struct_mutex") Signed-off-by: Chris Wilson Cc: Mika Kuoppala Reviewed-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20190315163933.19352-1-chris@chris-wilson.co.uk (cherry picked from commit 9073e5b26743b8b675cc44a9c0c8f8c3d584e1c0) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gpu_error.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 9a65341fec09..aa6791255252 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1721,7 +1721,7 @@ error_msg(struct i915_gpu_state *error, unsigned long engines, const char *msg) i915_error_generate_code(error, engines)); if (engines) { /* Just show the first executing process, more is confusing */ - i = ffs(engines); + i = __ffs(engines); len += scnprintf(error->error_msg + len, sizeof(error->error_msg) - len, ", in %s [%d]", -- cgit From 000c4f90e3f0194eef218ff2c6a8fd8ca1de4313 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Mar 2019 07:58:29 +0000 Subject: drm/i915: Sanity check mmap length against object size We assumed that vm_mmap() would reject an attempt to mmap past the end of the filp (our object), but we were wrong. Applications that tried to use the mmap beyond the end of the object would be greeted by a SIGBUS. After this patch, those applications will be told about the error on creating the mmap, rather than at a random moment on later access. Reported-by: Antonio Argenziano Testcase: igt/gem_mmap/bad-size Signed-off-by: Chris Wilson Cc: Antonio Argenziano Cc: Joonas Lahtinen Cc: Tvrtko Ursulin Cc: stable@vger.kernel.org Reviewed-by: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20190314075829.16838-1-chris@chris-wilson.co.uk (cherry picked from commit 794a11cb67201ad1bb61af510bb8460280feb3f3) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_gem.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 30d516e975c6..8558e81fdc2a 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1734,8 +1734,13 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, * pages from. */ if (!obj->base.filp) { - i915_gem_object_put(obj); - return -ENXIO; + addr = -ENXIO; + goto err; + } + + if (range_overflows(args->offset, args->size, (u64)obj->base.size)) { + addr = -EINVAL; + goto err; } addr = vm_mmap(obj->base.filp, 0, args->size, @@ -1749,8 +1754,8 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, struct vm_area_struct *vma; if (down_write_killable(&mm->mmap_sem)) { - i915_gem_object_put(obj); - return -EINTR; + addr = -EINTR; + goto err; } vma = find_vma(mm, addr); if (vma && __vma_matches(vma, obj->base.filp, addr, args->size)) @@ -1768,12 +1773,10 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data, i915_gem_object_put(obj); args->addr_ptr = (u64)addr; - return 0; err: i915_gem_object_put(obj); - return addr; } -- cgit From e5dcc0c3223c45c94100f05f28d8ef814db3d82c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 15 Mar 2019 10:41:14 -0700 Subject: net: rose: fix a possible stack overflow rose_write_internal() uses a temp buffer of 100 bytes, but a manual inspection showed that given arbitrary input, rose_create_facilities() can fill up to 110 bytes. Lets use a tailroom of 256 bytes for peace of mind, and remove the bounce buffer : we can simply allocate a big enough skb and adjust its length as needed. syzbot report : BUG: KASAN: stack-out-of-bounds in memcpy include/linux/string.h:352 [inline] BUG: KASAN: stack-out-of-bounds in rose_create_facilities net/rose/rose_subr.c:521 [inline] BUG: KASAN: stack-out-of-bounds in rose_write_internal+0x597/0x15d0 net/rose/rose_subr.c:116 Write of size 7 at addr ffff88808b1ffbef by task syz-executor.0/24854 CPU: 0 PID: 24854 Comm: syz-executor.0 Not tainted 5.0.0+ #97 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x172/0x1f0 lib/dump_stack.c:113 print_address_description.cold+0x7c/0x20d mm/kasan/report.c:187 kasan_report.cold+0x1b/0x40 mm/kasan/report.c:317 check_memory_region_inline mm/kasan/generic.c:185 [inline] check_memory_region+0x123/0x190 mm/kasan/generic.c:191 memcpy+0x38/0x50 mm/kasan/common.c:131 memcpy include/linux/string.h:352 [inline] rose_create_facilities net/rose/rose_subr.c:521 [inline] rose_write_internal+0x597/0x15d0 net/rose/rose_subr.c:116 rose_connect+0x7cb/0x1510 net/rose/af_rose.c:826 __sys_connect+0x266/0x330 net/socket.c:1685 __do_sys_connect net/socket.c:1696 [inline] __se_sys_connect net/socket.c:1693 [inline] __x64_sys_connect+0x73/0xb0 net/socket.c:1693 do_syscall_64+0x103/0x610 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x458079 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f47b8d9dc78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000458079 RDX: 000000000000001c RSI: 0000000020000040 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f47b8d9e6d4 R13: 00000000004be4a4 R14: 00000000004ceca8 R15: 00000000ffffffff The buggy address belongs to the page: page:ffffea00022c7fc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x1fffc0000000000() raw: 01fffc0000000000 0000000000000000 ffffffff022c0101 0000000000000000 raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff88808b1ffa80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffff88808b1ffb00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 03 >ffff88808b1ffb80: f2 f2 00 00 00 00 00 00 00 00 00 00 00 00 04 f3 ^ ffff88808b1ffc00: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 ffff88808b1ffc80: 00 00 00 00 00 00 00 f1 f1 f1 f1 f1 f1 01 f2 01 Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/rose/rose_subr.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/rose/rose_subr.c b/net/rose/rose_subr.c index 7ca57741b2fb..7849f286bb93 100644 --- a/net/rose/rose_subr.c +++ b/net/rose/rose_subr.c @@ -105,16 +105,17 @@ void rose_write_internal(struct sock *sk, int frametype) struct sk_buff *skb; unsigned char *dptr; unsigned char lci1, lci2; - char buffer[100]; - int len, faclen = 0; + int maxfaclen = 0; + int len, faclen; + int reserve; - len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN + 1; + reserve = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1; + len = ROSE_MIN_LEN; switch (frametype) { case ROSE_CALL_REQUEST: len += 1 + ROSE_ADDR_LEN + ROSE_ADDR_LEN; - faclen = rose_create_facilities(buffer, rose); - len += faclen; + maxfaclen = 256; break; case ROSE_CALL_ACCEPTED: case ROSE_CLEAR_REQUEST: @@ -123,15 +124,16 @@ void rose_write_internal(struct sock *sk, int frametype) break; } - if ((skb = alloc_skb(len, GFP_ATOMIC)) == NULL) + skb = alloc_skb(reserve + len + maxfaclen, GFP_ATOMIC); + if (!skb) return; /* * Space for AX.25 header and PID. */ - skb_reserve(skb, AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + 1); + skb_reserve(skb, reserve); - dptr = skb_put(skb, skb_tailroom(skb)); + dptr = skb_put(skb, len); lci1 = (rose->lci >> 8) & 0x0F; lci2 = (rose->lci >> 0) & 0xFF; @@ -146,7 +148,8 @@ void rose_write_internal(struct sock *sk, int frametype) dptr += ROSE_ADDR_LEN; memcpy(dptr, &rose->source_addr, ROSE_ADDR_LEN); dptr += ROSE_ADDR_LEN; - memcpy(dptr, buffer, faclen); + faclen = rose_create_facilities(dptr, rose); + skb_put(skb, faclen); dptr += faclen; break; -- cgit From c22da36688d6298f2e546dcc43fdc1ad35036467 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 16 Mar 2019 01:00:50 +0100 Subject: gtp: change NET_UDP_TUNNEL dependency to select Similarly to commit a7603ac1fc8c ("geneve: change NET_UDP_TUNNEL dependency to select"), GTP has a dependency on NET_UDP_TUNNEL which makes impossible to compile it if no other protocol depending on NET_UDP_TUNNEL is selected. Fix this by changing the depends to a select, and drop NET_IP_TUNNEL from the select list, as it already depends on NET_UDP_TUNNEL. Signed-off-by: Matteo Croce Signed-off-by: David S. Miller --- drivers/net/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5e4ca082cfcd..7a96d168efc4 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -216,8 +216,8 @@ config GENEVE config GTP tristate "GPRS Tunneling Protocol datapath (GTP-U)" - depends on INET && NET_UDP_TUNNEL - select NET_IP_TUNNEL + depends on INET + select NET_UDP_TUNNEL ---help--- This allows one to create gtp virtual interfaces that provide the GPRS Tunneling Protocol datapath (GTP-U). This tunneling protocol -- cgit From bb9e5c5bcd76f4474eac3baf643d7a39f7bac7bb Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Sat, 16 Mar 2019 14:21:19 +1100 Subject: mac8390: Fix mmio access size probe The bug that Stan reported is as follows. After a restart, a 16-bit NIC may be incorrectly identified as a 32-bit NIC and stop working. mac8390 slot.E: Memory length resource not found, probing mac8390 slot.E: Farallon EtherMac II-C (type farallon) mac8390 slot.E: MAC 00:00:c5:30:c2:99, IRQ 61, 32 KB shared memory at 0xfeed0000, 32-bit access. The bug never arises after a cold start and only intermittently after a warm start. (I didn't investigate why the bug is intermittent.) It turns out that memcpy_toio() is deprecated and memcmp_withio() also has issues. Replacing these calls with mmio accessors fixes the problem. Reported-and-tested-by: Stan Johnson Fixes: 2964db0f5904 ("m68k: Mac DP8390 update") Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/mac8390.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c index 342ae08ec3c2..d60a86aa8aa8 100644 --- a/drivers/net/ethernet/8390/mac8390.c +++ b/drivers/net/ethernet/8390/mac8390.c @@ -153,8 +153,6 @@ static void dayna_block_input(struct net_device *dev, int count, static void dayna_block_output(struct net_device *dev, int count, const unsigned char *buf, int start_page); -#define memcmp_withio(a, b, c) memcmp((a), (void *)(b), (c)) - /* Slow Sane (16-bit chunk memory read/write) Cabletron uses this */ static void slow_sane_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page); @@ -233,19 +231,26 @@ static enum mac8390_type mac8390_ident(struct nubus_rsrc *fres) static enum mac8390_access mac8390_testio(unsigned long membase) { - unsigned long outdata = 0xA5A0B5B0; - unsigned long indata = 0x00000000; + u32 outdata = 0xA5A0B5B0; + u32 indata = 0; + /* Try writing 32 bits */ - memcpy_toio((void __iomem *)membase, &outdata, 4); - /* Now compare them */ - if (memcmp_withio(&outdata, membase, 4) == 0) + nubus_writel(outdata, membase); + /* Now read it back */ + indata = nubus_readl(membase); + if (outdata == indata) return ACCESS_32; + + outdata = 0xC5C0D5D0; + indata = 0; + /* Write 16 bit output */ word_memcpy_tocard(membase, &outdata, 4); /* Now read it back */ word_memcpy_fromcard(&indata, membase, 4); if (outdata == indata) return ACCESS_16; + return ACCESS_UNKNOWN; } -- cgit From a7faaa0c5dc7d091cc9f72b870d7edcdd6f43f12 Mon Sep 17 00:00:00 2001 From: Dmitry Bogdanov Date: Sat, 16 Mar 2019 08:28:18 +0000 Subject: net: aquantia: fix rx checksum offload for UDP/TCP over IPv6 TCP/UDP checksum validity was propagated to skb only if IP checksum is valid. But for IPv6 there is no validity as there is no checksum in IPv6. This patch propagates TCP/UDP checksum validity regardless of IP checksum. Fixes: 018423e90bee ("net: ethernet: aquantia: Add ring support code") Signed-off-by: Igor Russkikh Signed-off-by: Nikita Danilov Signed-off-by: Dmitry Bogdanov Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c index 74550ccc7a20..e2ffb159cbe2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c @@ -186,11 +186,12 @@ static void aq_rx_checksum(struct aq_ring_s *self, } if (buff->is_ip_cso) { __skb_incr_checksum_unnecessary(skb); - if (buff->is_udp_cso || buff->is_tcp_cso) - __skb_incr_checksum_unnecessary(skb); } else { skb->ip_summed = CHECKSUM_NONE; } + + if (buff->is_udp_cso || buff->is_tcp_cso) + __skb_incr_checksum_unnecessary(skb); } #define AQ_SKB_ALIGN SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) -- cgit From cc4807bb609230d8959fd732b0bf3bd4c2de8eac Mon Sep 17 00:00:00 2001 From: Zhiqiang Liu Date: Sat, 16 Mar 2019 17:02:54 +0800 Subject: vxlan: Don't call gro_cells_destroy() before device is unregistered Commit ad6c9986bcb62 ("vxlan: Fix GRO cells race condition between receive and link delete") fixed a race condition for the typical case a vxlan device is dismantled from the current netns. But if a netns is dismantled, vxlan_destroy_tunnels() is called to schedule a unregister_netdevice_queue() of all the vxlan tunnels that are related to this netns. In vxlan_destroy_tunnels(), gro_cells_destroy() is called and finished before unregister_netdevice_queue(). This means that the gro_cells_destroy() call is done too soon, for the same reasons explained in above commit. So we need to fully respect the RCU rules, and thus must remove the gro_cells_destroy() call or risk use after-free. Fixes: 58ce31cca1ff ("vxlan: GRO support at tunnel layer") Signed-off-by: Suanming.Mou Suggested-by: Eric Dumazet Reviewed-by: Stefano Brivio Reviewed-by: Zhiqiang Liu Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 077f1b9f2761..d76dfed8d9bb 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -4335,10 +4335,8 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head) /* If vxlan->dev is in the same netns, it has already been added * to the list by the previous loop. */ - if (!net_eq(dev_net(vxlan->dev), net)) { - gro_cells_destroy(&vxlan->gro_cells); + if (!net_eq(dev_net(vxlan->dev), net)) unregister_netdevice_queue(vxlan->dev, head); - } } for (h = 0; h < PORT_HASH_SIZE; ++h) -- cgit From a4dc6a49156b1f8d6e17251ffda17c9e6a5db78a Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Sat, 16 Mar 2019 14:41:30 +0100 Subject: packets: Always register packet sk in the same order When using fanouts with AF_PACKET, the demux functions such as fanout_demux_cpu will return an index in the fanout socket array, which corresponds to the selected socket. The ordering of this array depends on the order the sockets were added to a given fanout group, so for FANOUT_CPU this means sockets are bound to cpus in the order they are configured, which is OK. However, when stopping then restarting the interface these sockets are bound to, the sockets are reassigned to the fanout group in the reverse order, due to the fact that they were inserted at the head of the interface's AF_PACKET socket list. This means that traffic that was directed to the first socket in the fanout group is now directed to the last one after an interface restart. In the case of FANOUT_CPU, traffic from CPU0 will be directed to the socket that used to receive traffic from the last CPU after an interface restart. This commit introduces a helper to add a socket at the tail of a list, then uses it to register AF_PACKET sockets. Note that this changes the order in which sockets are listed in /proc and with sock_diag. Fixes: dc99f600698d ("packet: Add fanout support") Signed-off-by: Maxime Chevallier Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/net/sock.h | 6 ++++++ net/packet/af_packet.c | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/include/net/sock.h b/include/net/sock.h index 328cb7cb7b0b..8de5ee258b93 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -710,6 +710,12 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) hlist_add_head_rcu(&sk->sk_node, list); } +static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list) +{ + sock_hold(sk); + hlist_add_tail_rcu(&sk->sk_node, list); +} + static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8376bc1c1508..8754d7c93b84 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3243,7 +3243,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, } mutex_lock(&net->packet.sklist_lock); - sk_add_node_rcu(sk, &net->packet.sklist); + sk_add_node_tail_rcu(sk, &net->packet.sklist); mutex_unlock(&net->packet.sklist_lock); preempt_disable(); -- cgit From 18bed89107a400af0d672ec85a270f1545db2569 Mon Sep 17 00:00:00 2001 From: Yoshiki Komachi Date: Mon, 18 Mar 2019 14:39:52 +0900 Subject: af_packet: fix the tx skb protocol in raw sockets with ETH_P_ALL I am using "protocol ip" filters in TC to manipulate TC flower classifiers, which are only available with "protocol ip". However, I faced an issue that packets sent via raw sockets with ETH_P_ALL did not match the ip filters even if they did satisfy the condition (e.g., DHCP offer from dhcpd). I have determined that the behavior was caused by an unexpected value stored in skb->protocol, namely, ETH_P_ALL instead of ETH_P_IP, when packets were sent via raw sockets with ETH_P_ALL set. IMHO, storing ETH_P_ALL in skb->protocol is not appropriate for packets sent via raw sockets because ETH_P_ALL is not a real ether type used on wire, but a virtual one. This patch fixes the tx protocol selection in cases of transmission via raw sockets created with ETH_P_ALL so that it asks the driver to extract protocol from the Ethernet header. Fixes: 75c65772c3 ("net/packet: Ask driver for protocol if not provided by user") Signed-off-by: Yoshiki Komachi Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/packet/af_packet.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8754d7c93b84..323655a25674 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1852,7 +1852,8 @@ oom: static void packet_parse_headers(struct sk_buff *skb, struct socket *sock) { - if (!skb->protocol && sock->type == SOCK_RAW) { + if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && + sock->type == SOCK_RAW) { skb_reset_mac_header(skb); skb->protocol = dev_parse_header_protocol(skb); } -- cgit From 273160ffc6b993c7c91627f5a84799c66dfe4dee Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 19:47:00 +0800 Subject: sctp: get sctphdr by offset in sctp_compute_cksum sctp_hdr(skb) only works when skb->transport_header is set properly. But in Netfilter, skb->transport_header for ipv6 is not guaranteed to be right value for sctphdr. It would cause to fail to check the checksum for sctp packets. So fix it by using offset, which is always right in all places. v1->v2: - Fix the changelog. Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code") Reported-by: Li Shuang Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 32ee65a30aff..1c6e6c0766ca 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -61,7 +61,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { - struct sctphdr *sh = sctp_hdr(skb); + struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); const struct skb_checksum_ops ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, -- cgit From 636d25d557d1073281013c43e4ff4737692da2d4 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 19:58:29 +0800 Subject: sctp: not copy sctp_sock pd_lobby in sctp_copy_descendant Now sctp_copy_descendant() copies pd_lobby from old sctp scok to new sctp sock. If sctp_sock_migrate() returns error, it will panic when releasing new sock and trying to purge pd_lobby due to the incorrect pointers in pd_lobby. [ 120.485116] kasan: CONFIG_KASAN_INLINE enabled [ 120.486270] kasan: GPF could be caused by NULL-ptr deref or user [ 120.509901] Call Trace: [ 120.510443] sctp_ulpevent_free+0x1e8/0x490 [sctp] [ 120.511438] sctp_queue_purge_ulpevents+0x97/0xe0 [sctp] [ 120.512535] sctp_close+0x13a/0x700 [sctp] [ 120.517483] inet_release+0xdc/0x1c0 [ 120.518215] __sock_release+0x1d2/0x2a0 [ 120.519025] sctp_do_peeloff+0x30f/0x3c0 [sctp] We fix it by not copying sctp_sock pd_lobby in sctp_copy_descendan(), and skb_queue_head_init() can also be removed in sctp_sock_migrate(). Reported-by: syzbot+85e0b422ff140b03672a@syzkaller.appspotmail.com Fixes: 89664c623617 ("sctp: sctp_sock_migrate() returns error if sctp_bind_addr_dup() fails") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6140471efd4b..65b538604c5b 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -9169,7 +9169,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to, { int ancestor_size = sizeof(struct inet_sock) + sizeof(struct sctp_sock) - - offsetof(struct sctp_sock, auto_asconf_list); + offsetof(struct sctp_sock, pd_lobby); if (sk_from->sk_family == PF_INET6) ancestor_size += sizeof(struct ipv6_pinfo); @@ -9253,7 +9253,6 @@ static int sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * 2) Peeling off partial delivery; keep pd_lobby in new pd_lobby. * 3) Peeling off non-partial delivery; move pd_lobby to receive_queue. */ - skb_queue_head_init(&newsp->pd_lobby); atomic_set(&sctp_sk(newsk)->pd_mode, assoc->ulpq.pd_mode); if (atomic_read(&sctp_sk(oldsk)->pd_mode)) { -- cgit From 1354e72fabf4d8763817564648984351755f0ccb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 18 Mar 2019 20:05:59 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt Currently if the user pass an invalid asoc_id to SCTP_DEFAULT_SEND_PARAM on a TCP-style socket, it will silently ignore the new parameters. That's because after not finding an asoc, it is checking asoc_id against the known values of CURRENT/FUTURE/ALL values and that fails to match. IOW, if the user supplies an invalid asoc id or not, it should either match the current asoc or the socket itself so that it will inherit these later. Fixes it by forcing asoc_id to SCTP_FUTURE_ASSOC in case it is a TCP-style socket without an asoc, so that the values get set on the socket. Fixes: 707e45b3dc5a ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DEFAULT_SEND_PARAM sockopt") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 65b538604c5b..d0e5c627a266 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3024,6 +3024,9 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + info.sinfo_assoc_id = SCTP_FUTURE_ASSOC; + if (info.sinfo_assoc_id == SCTP_FUTURE_ASSOC || info.sinfo_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.sinfo_stream; -- cgit From 8e2614fc1c2a525d3244df65da486fc914a2bf78 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:00 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DELAYED_SACK sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_DELAYED_SACK sockopt. Fixes: 9c5829e1c49e ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DELAYED_SACK sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d0e5c627a266..4c5821befaf3 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2920,6 +2920,9 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + params.sack_assoc_id = SCTP_FUTURE_ASSOC; + if (params.sack_assoc_id == SCTP_FUTURE_ASSOC || params.sack_assoc_id == SCTP_ALL_ASSOC) { if (params.sack_delay) { -- cgit From a842e65b25a418363bf8196e2343123a984ee69b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:01 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SNDINFO sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_DEFAULT_SNDINFO sockopt. Fixes: 92fc3bd928c9 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DEFAULT_SNDINFO sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4c5821befaf3..3bac03931b67 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3087,6 +3087,9 @@ static int sctp_setsockopt_default_sndinfo(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + info.snd_assoc_id = SCTP_FUTURE_ASSOC; + if (info.snd_assoc_id == SCTP_FUTURE_ASSOC || info.snd_assoc_id == SCTP_ALL_ASSOC) { sp->default_stream = info.snd_sid; -- cgit From cface2cb585e392995cc11a4a814b433e6099ec7 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:02 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_CONTEXT sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_CONTEXT sockopt. Fixes: 49b037acca8c ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_CONTEXT sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3bac03931b67..b024a625f78a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3540,6 +3540,9 @@ static int sctp_setsockopt_context(struct sock *sk, char __user *optval, return 0; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->default_rcv_context = params.assoc_value; -- cgit From 746bc215a6b223caf7eb6b33400458c15e742920 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:03 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_MAX_BURST sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_MAX_BURST sockopt. Fixes: e0651a0dc877 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_MAX_BURST sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b024a625f78a..df879b163414 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3682,6 +3682,9 @@ static int sctp_setsockopt_maxburst(struct sock *sk, return 0; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->max_burst = params.assoc_value; -- cgit From 0685d6b72207a6de7ea6853e48b009e71d64fe1b Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:04 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_KEY sockopt. Fixes: 7fb3be13a236 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index df879b163414..2ac221c795c2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3813,6 +3813,9 @@ static int sctp_setsockopt_auth_key(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + authkey->sca_assoc_id = SCTP_FUTURE_ASSOC; + if (authkey->sca_assoc_id == SCTP_FUTURE_ASSOC || authkey->sca_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_key(ep, asoc, authkey); -- cgit From 06b39e8506f6dd4e11e1d8fc4d314d72d237ad10 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:05 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_ACTIVE_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_ACTIVE_KEY sockopt. Fixes: bf9fb6ad4f29 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_ACTIVE_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 2ac221c795c2..1d098f0ccbb5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3871,6 +3871,9 @@ static int sctp_setsockopt_active_key(struct sock *sk, if (asoc) return sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_set_active_key(ep, asoc, val.scact_keynumber); -- cgit From 220675eb2e485519afbe7f3b457f7ce883086482 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:06 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_DELETE_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_DELETE_KEY sockopt. Fixes: 3adcc300603e ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_DELETE_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1d098f0ccbb5..087ca0bb86bd 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3925,6 +3925,9 @@ static int sctp_setsockopt_del_key(struct sock *sk, if (asoc) return sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_del_key_id(ep, asoc, val.scact_keynumber); -- cgit From 200f3a3bcb293d8d55b860632b9d5c9b5e763273 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:07 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_AUTH_DEACTIVATE_KEY sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_AUTH_DEACTIVATE_KEY sockopt. Fixes: 2af66ff3edc7 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_AUTH_DEACTIVATE_KEY sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 087ca0bb86bd..0187444567e0 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3978,6 +3978,9 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, if (asoc) return sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); + if (sctp_style(sk, TCP)) + val.scact_assoc_id = SCTP_FUTURE_ASSOC; + if (val.scact_assoc_id == SCTP_FUTURE_ASSOC || val.scact_assoc_id == SCTP_ALL_ASSOC) { ret = sctp_auth_deact_key_id(ep, asoc, val.scact_keynumber); -- cgit From cbb45c6cd5e64c344798892d6e200b0b253d0b59 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:08 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_PRINFO sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_DEFAULT_PRINFO sockopt. Fixes: 3a583059d187 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_DEFAULT_PRINFO sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 0187444567e0..238e5c804cc9 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4196,6 +4196,9 @@ static int sctp_setsockopt_default_prinfo(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + info.pr_assoc_id = SCTP_FUTURE_ASSOC; + if (info.pr_assoc_id == SCTP_FUTURE_ASSOC || info.pr_assoc_id == SCTP_ALL_ASSOC) { SCTP_PR_SET_POLICY(sp->default_flags, info.pr_policy); -- cgit From 9430ff992644e9f9c3ba6283cc56d40b421522e9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:09 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_ENABLE_STREAM_RESET sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_ENABLE_STREAM_RESET sockopt. Fixes: 99a62135e127 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_ENABLE_STREAM_RESET sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 238e5c804cc9..6b0ee8946bdb 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4281,6 +4281,9 @@ static int sctp_setsockopt_enable_strreset(struct sock *sk, goto out; } + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) ep->strreset_enable = params.assoc_value; -- cgit From 995186193fd7a21c8fc6a2f2a96d33e26447eb01 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:10 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_EVENT sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_EVENT sockopt. Fixes: d251f05e3ba2 ("sctp: use SCTP_FUTURE_ASSOC and add SCTP_CURRENT_ASSOC for SCTP_EVENT sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6b0ee8946bdb..10df48aa4e2c 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4574,6 +4574,9 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval, if (asoc) return sctp_assoc_ulpevent_type_set(¶m, asoc); + if (sctp_style(sk, TCP)) + param.se_assoc_id = SCTP_FUTURE_ASSOC; + if (param.se_assoc_id == SCTP_FUTURE_ASSOC || param.se_assoc_id == SCTP_ALL_ASSOC) sctp_ulpevent_type_set(&sp->subscribe, -- cgit From b59c19d9d901a8eb04896ec027787a55acb71fc6 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Mon, 18 Mar 2019 20:06:11 +0800 Subject: sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_STREAM_SCHEDULER sockopt A similar fix as Patch "sctp: fix ignoring asoc_id for tcp-style sockets on SCTP_DEFAULT_SEND_PARAM sockopt" on SCTP_STREAM_SCHEDULER sockopt. Fixes: 7efba10d6bd2 ("sctp: add SCTP_FUTURE_ASOC and SCTP_CURRENT_ASSOC for SCTP_STREAM_SCHEDULER sockopt") Signed-off-by: Xin Long Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 10df48aa4e2c..011c349d877a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4409,6 +4409,9 @@ static int sctp_setsockopt_scheduler(struct sock *sk, if (asoc) return sctp_sched_set_sched(asoc, params.assoc_value); + if (sctp_style(sk, TCP)) + params.assoc_id = SCTP_FUTURE_ASSOC; + if (params.assoc_id == SCTP_FUTURE_ASSOC || params.assoc_id == SCTP_ALL_ASSOC) sp->default_ss = params.assoc_value; -- cgit From fae846e2b7124d4b076ef17791c73addf3b26350 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Mon, 18 Mar 2019 08:51:06 -0500 Subject: mISDN: hfcpci: Test both vendor & device ID for Digium HFC4S The device ID alone does not uniquely identify a device. Test both the vendor and device ID to make sure we don't mistakenly think some other vendor's 0xB410 device is a Digium HFC4S. Also, instead of the bare hex ID, use the same constant (PCI_DEVICE_ID_DIGIUM_HFC4S) used in the device ID table. No functional change intended. Signed-off-by: Bjorn Helgaas Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcmulti.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 4d85645c87f7..0928fd1f0e0c 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -4365,7 +4365,8 @@ setup_pci(struct hfc_multi *hc, struct pci_dev *pdev, if (m->clock2) test_and_set_bit(HFC_CHIP_CLOCK2, &hc->chip); - if (ent->device == 0xB410) { + if (ent->vendor == PCI_VENDOR_ID_DIGIUM && + ent->device == PCI_DEVICE_ID_DIGIUM_HFC4S) { test_and_set_bit(HFC_CHIP_B410P, &hc->chip); test_and_set_bit(HFC_CHIP_PCM_MASTER, &hc->chip); test_and_clear_bit(HFC_CHIP_PCM_SLAVE, &hc->chip); -- cgit From 12b409dd32dffad6d800774e2d250adaaaa1fdcd Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Mar 2019 16:40:54 +0100 Subject: s390/qeth: don't erase configuration while probing The HW trap and VNICC configuration is exposed via sysfs, and may have already been modified when qeth_l?_probe_device() attempts to initialize them. So (1) initialize the VNICC values a little earlier, and (2) don't bother about the HW trap mode, it was already initialized before. Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 4 ++-- drivers/s390/net/qeth_l3_main.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8efb2e8ff8f4..3bfdd8545776 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -645,6 +645,8 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; + qeth_l2_vnicc_set_defaults(card); + if (gdev->dev.type == &qeth_generic_devtype) { rc = qeth_l2_create_device_attributes(&gdev->dev); if (rc) @@ -652,8 +654,6 @@ static int qeth_l2_probe_device(struct ccwgroup_device *gdev) } hash_init(card->mac_htable); - card->info.hwtrap = 0; - qeth_l2_vnicc_set_defaults(card); return 0; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7e68d9d16859..2f002843b16e 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2260,7 +2260,6 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) } hash_init(card->ip_htable); hash_init(card->ip_mc_htable); - card->info.hwtrap = 0; return 0; } -- cgit From 7221b727f0079a32aca91f657141e1de564d4b97 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Mar 2019 16:40:55 +0100 Subject: s390/qeth: fix race when initializing the IP address table The ucast IP table is utilized by some of the L3-specific sysfs attributes that qeth_l3_create_device_attributes() provides. So initialize the table _before_ registering the attributes. Fixes: ebccc7397e4a ("s390/qeth: add missing hash table initializations") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l3_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 2f002843b16e..d805e72edb58 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2253,12 +2253,14 @@ static int qeth_l3_probe_device(struct ccwgroup_device *gdev) struct qeth_card *card = dev_get_drvdata(&gdev->dev); int rc; + hash_init(card->ip_htable); + if (gdev->dev.type == &qeth_generic_devtype) { rc = qeth_l3_create_device_attributes(&gdev->dev); if (rc) return rc; } - hash_init(card->ip_htable); + hash_init(card->ip_mc_htable); return 0; } -- cgit From 104b48592b5441c722dcd95c38ab9300f2d94856 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 18 Mar 2019 16:40:56 +0100 Subject: s390/qeth: be drop monitor friendly As part of the TX completion path, qeth_release_skbs() frees the completed skbs with __skb_queue_purge(). This ends in kfree_skb(), reporting every completed skb as dropped. On the other hand when dropping an skb in .ndo_start_xmit, we end up calling consume_skb()... where we should be using kfree_skb() so that drop monitors get notified. Switch the drop/consume logic around, and also don't accumulate dropped packets in the tx_errors statistics. Fixes: dc149e3764d8 ("s390/qeth: replace open-coded skb_queue_walk()") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 5 ++++- drivers/s390/net/qeth_l2_main.c | 3 +-- drivers/s390/net/qeth_l3_main.c | 3 +-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 197b0f5b63e7..44bd6f04c145 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -1150,13 +1150,16 @@ static void qeth_notify_skbs(struct qeth_qdio_out_q *q, static void qeth_release_skbs(struct qeth_qdio_out_buffer *buf) { + struct sk_buff *skb; + /* release may never happen from within CQ tasklet scope */ WARN_ON_ONCE(atomic_read(&buf->state) == QETH_QDIO_BUF_IN_CQ); if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING) qeth_notify_skbs(buf->q, buf, TX_NOTIFY_GENERALERROR); - __skb_queue_purge(&buf->skb_list); + while ((skb = __skb_dequeue(&buf->skb_list)) != NULL) + consume_skb(skb); } static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue, diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 3bfdd8545776..c3067fd3bd9e 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -629,8 +629,7 @@ static netdev_tx_t qeth_l2_hard_start_xmit(struct sk_buff *skb, } /* else fall through */ QETH_TXQ_STAT_INC(queue, tx_dropped); - QETH_TXQ_STAT_INC(queue, tx_errors); - dev_kfree_skb_any(skb); + kfree_skb(skb); netif_wake_queue(dev); return NETDEV_TX_OK; } diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index d805e72edb58..53712cf26406 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -2096,8 +2096,7 @@ static netdev_tx_t qeth_l3_hard_start_xmit(struct sk_buff *skb, tx_drop: QETH_TXQ_STAT_INC(queue, tx_dropped); - QETH_TXQ_STAT_INC(queue, tx_errors); - dev_kfree_skb_any(skb); + kfree_skb(skb); netif_wake_queue(dev); return NETDEV_TX_OK; } -- cgit From 4a9be28c45bf02fa0436808bb6c0baeba30e120e Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 19 Mar 2019 11:33:24 +1100 Subject: NFS: fix mount/umount race in nlmclnt. If the last NFSv3 unmount from a given host races with a mount from the same host, we can destroy an nlm_host that is still in use. Specifically nlmclnt_lookup_host() can increment h_count on an nlm_host that nlmclnt_release_host() has just successfully called refcount_dec_and_test() on. Once nlmclnt_lookup_host() drops the mutex, nlm_destroy_host_lock() will be called to destroy the nlmclnt which is now in use again. The cause of the problem is that the dec_and_test happens outside the locked region. This is easily fixed by using refcount_dec_and_mutex_lock(). Fixes: 8ea6ecc8b075 ("lockd: Create client-side nlm_host cache") Cc: stable@vger.kernel.org (v2.6.38+) Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- fs/lockd/host.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/lockd/host.c b/fs/lockd/host.c index 93fb7cf0b92b..f0b5c987d6ae 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c @@ -290,12 +290,11 @@ void nlmclnt_release_host(struct nlm_host *host) WARN_ON_ONCE(host->h_server); - if (refcount_dec_and_test(&host->h_count)) { + if (refcount_dec_and_mutex_lock(&host->h_count, &nlm_host_mutex)) { WARN_ON_ONCE(!list_empty(&host->h_lockowners)); WARN_ON_ONCE(!list_empty(&host->h_granted)); WARN_ON_ONCE(!list_empty(&host->h_reclaim)); - mutex_lock(&nlm_host_mutex); nlm_destroy_host_locked(host); mutex_unlock(&nlm_host_mutex); } -- cgit From ffa91253739ca89fc997195d8bbd1f7ba3e29fbe Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 18 Mar 2019 11:07:33 -0700 Subject: Documentation: networking: Update netdev-FAQ regarding patches Provide an explanation of what is expected with respect to sending new versions of specific patches within a patch series, as well as what happens if an earlier patch series accidentally gets merged). Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/networking/netdev-FAQ.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Documentation/networking/netdev-FAQ.rst b/Documentation/networking/netdev-FAQ.rst index 0ac5fa77f501..8c7a713cf657 100644 --- a/Documentation/networking/netdev-FAQ.rst +++ b/Documentation/networking/netdev-FAQ.rst @@ -131,6 +131,19 @@ it to the maintainer to figure out what is the most recent and current version that should be applied. If there is any doubt, the maintainer will reply and ask what should be done. +Q: I made changes to only a few patches in a patch series should I resend only those changed? +-------------------------------------------------------------------------------------------- +A: No, please resend the entire patch series and make sure you do number your +patches such that it is clear this is the latest and greatest set of patches +that can be applied. + +Q: I submitted multiple versions of a patch series and it looks like a version other than the last one has been accepted, what should I do? +------------------------------------------------------------------------------------------------------------------------------------------- +A: There is no revert possible, once it is pushed out, it stays like that. +Please send incremental versions on top of what has been merged in order to fix +the patches the way they would look like if your latest patch series was to be +merged. + Q: How can I tell what patches are queued up for backporting to the various stable releases? -------------------------------------------------------------------------------------------- A: Normally Greg Kroah-Hartman collects stable commits himself, but for -- cgit From 744c67ffeb06f2d2493f4049ba0bd19698ce0adf Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 19 Mar 2019 09:28:43 +0800 Subject: ALSA: hda - Don't trigger jackpoll_work in azx_resume The commit 3baffc4a84d7 (ALSA: hda/intel: Refactoring PM code) changed the behaviour of azx_resume(), it triggers the jackpoll_work after applying this commit. This change introduced a new issue, all codecs are runtime active after S3, and will not call runtime_suspend() automatically. The root cause is the jackpoll_work calls snd_hda_power_up/down_pm, and it calls up_pm before snd_hdac_enter_pm is called, while calls the down_pm in the middle of enter_pm and leave_pm is called. This makes the dev->power.usage_count unbalanced after S3. To fix it, let azx_resume() don't trigger jackpoll_work as before it did. Fixes: 3baffc4a84d7 ("ALSA: hda/intel: Refactoring PM code") Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_intel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index 4f502c92061f..ece256a3b48f 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -947,7 +947,7 @@ static void __azx_runtime_suspend(struct azx *chip) display_power(chip, false); } -static void __azx_runtime_resume(struct azx *chip) +static void __azx_runtime_resume(struct azx *chip, bool from_rt) { struct hda_intel *hda = container_of(chip, struct hda_intel, chip); struct hdac_bus *bus = azx_bus(chip); @@ -964,7 +964,7 @@ static void __azx_runtime_resume(struct azx *chip) azx_init_pci(chip); hda_intel_init_chip(chip, true); - if (status) { + if (status && from_rt) { list_for_each_codec(codec, &chip->bus) if (status & (1 << codec->addr)) schedule_delayed_work(&codec->jackpoll_work, @@ -1016,7 +1016,7 @@ static int azx_resume(struct device *dev) chip->msi = 0; if (azx_acquire_irq(chip, 1) < 0) return -EIO; - __azx_runtime_resume(chip); + __azx_runtime_resume(chip, false); snd_power_change_state(card, SNDRV_CTL_POWER_D0); trace_azx_resume(chip); @@ -1081,7 +1081,7 @@ static int azx_runtime_resume(struct device *dev) chip = card->private_data; if (!azx_has_pm_runtime(chip)) return 0; - __azx_runtime_resume(chip); + __azx_runtime_resume(chip, true); /* disable controller Wake Up event*/ azx_writew(chip, WAKEEN, azx_readw(chip, WAKEEN) & -- cgit From b5a236c175b0d984552a5f7c9d35141024c2b261 Mon Sep 17 00:00:00 2001 From: Hui Wang Date: Tue, 19 Mar 2019 09:28:44 +0800 Subject: ALSA: hda - Enforces runtime_resume after S3 and S4 for each codec Recently we found the audio jack detection stop working after suspend on many machines with Realtek codec. Sometimes the audio selection dialogue didn't show up after users plugged headhphone/headset into the headset jack, sometimes after uses plugged headphone/headset, then click the sound icon on the upper-right corner of gnome-desktop, it also showed the speaker rather than the headphone. The root cause is that before suspend, the codec already call the runtime_suspend since this codec is not used by any apps, then in resume, it will not call runtime_resume for this codec. But for some realtek codec (so far, alc236, alc255 and alc891) with the specific BIOS, if it doesn't run runtime_resume after suspend, all codec functions including jack detection stop working anymore. This problem existed for a long time, but it was not exposed, that is because when problem happens, if users play sound or open sound-setting to check audio device, this will trigger calling to runtime_resume (via snd_hda_power_up), then the codec starts working again before users notice this problem. Since we don't know how many codec and BIOS combinations have this problem, to fix it, let the driver call runtime_resume for all codecs in pm_resume, maybe for some codecs, this is not needed, but it is harmless. After a codec is runtime resumed, if it is not used by any apps, it will be runtime suspended soon and furthermore we don't run suspend frequently, this change will not add much power consumption. Fixes: cc72da7d4d06 ("ALSA: hda - Use standard runtime PM for codec power-save control") Signed-off-by: Hui Wang Signed-off-by: Takashi Iwai --- sound/pci/hda/hda_codec.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 5f2005098a60..ec0b8595eb4d 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -2939,6 +2939,20 @@ static int hda_codec_runtime_resume(struct device *dev) #endif /* CONFIG_PM */ #ifdef CONFIG_PM_SLEEP +static int hda_codec_force_resume(struct device *dev) +{ + int ret; + + /* The get/put pair below enforces the runtime resume even if the + * device hasn't been used at suspend time. This trick is needed to + * update the jack state change during the sleep. + */ + pm_runtime_get_noresume(dev); + ret = pm_runtime_force_resume(dev); + pm_runtime_put(dev); + return ret; +} + static int hda_codec_pm_suspend(struct device *dev) { dev->power.power_state = PMSG_SUSPEND; @@ -2948,7 +2962,7 @@ static int hda_codec_pm_suspend(struct device *dev) static int hda_codec_pm_resume(struct device *dev) { dev->power.power_state = PMSG_RESUME; - return pm_runtime_force_resume(dev); + return hda_codec_force_resume(dev); } static int hda_codec_pm_freeze(struct device *dev) @@ -2960,13 +2974,13 @@ static int hda_codec_pm_freeze(struct device *dev) static int hda_codec_pm_thaw(struct device *dev) { dev->power.power_state = PMSG_THAW; - return pm_runtime_force_resume(dev); + return hda_codec_force_resume(dev); } static int hda_codec_pm_restore(struct device *dev) { dev->power.power_state = PMSG_RESTORE; - return pm_runtime_force_resume(dev); + return hda_codec_force_resume(dev); } #endif /* CONFIG_PM_SLEEP */ -- cgit From b2d22b6bb33aac10c415e4ba13c8eade201c6f09 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 12 Mar 2019 12:42:37 +0100 Subject: fanotify: Allow copying of file handle to userspace When file handle is embedded inside fanotify_event and usercopy checks are enabled, we get a warning like: Bad or missing usercopy whitelist? Kernel memory exposure attempt detected from SLAB object 'fanotify_event' (offset 40, size 8)! WARNING: CPU: 1 PID: 7649 at mm/usercopy.c:78 usercopy_warn+0xeb/0x110 mm/usercopy.c:78 Annotate handling in fanotify_event properly to mark copying it to userspace is fine. Reported-by: syzbot+2c49971e251e36216d1f@syzkaller.appspotmail.com Fixes: a8b13aa20afb ("fanotify: enable FAN_REPORT_FID init flag") Signed-off-by: Kees Cook Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- fs/notify/fanotify/fanotify_user.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 56992b32c6bb..a90bb19dcfa2 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -208,6 +208,7 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) { struct fanotify_event_info_fid info = { }; struct file_handle handle = { }; + unsigned char bounce[FANOTIFY_INLINE_FH_LEN], *fh; size_t fh_len = event->fh_len; size_t len = fanotify_event_info_len(event); @@ -233,7 +234,16 @@ static int copy_fid_to_user(struct fanotify_event *event, char __user *buf) buf += sizeof(handle); len -= sizeof(handle); - if (copy_to_user(buf, fanotify_event_fh(event), fh_len)) + /* + * For an inline fh, copy through stack to exclude the copy from + * usercopy hardening protections. + */ + fh = fanotify_event_fh(event); + if (fh_len <= FANOTIFY_INLINE_FH_LEN) { + memcpy(bounce, fh, fh_len); + fh = bounce; + } + if (copy_to_user(buf, fh, fh_len)) return -EFAULT; /* Pad with 0's */ -- cgit From 1a7ee0efb26d6e25433c6d4428028ac614f55ff1 Mon Sep 17 00:00:00 2001 From: Michal Vokáč Date: Fri, 1 Mar 2019 08:26:42 +0100 Subject: ARM: dts: imx6dl-yapp4: Use rgmii-id phy mode on the cpu port MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use rgmii-id phy mode for the CPU port (MAC0) of the QCA8334 switch to add delays to both Tx and Rx clock. It worked with the rgmii mode before because the qca8k driver (incorrectly) enabled delays in that mode and rgmii-id was not implemented at all. Commit 5ecdd77c61c8 ("net: dsa: qca8k: disable delay for RGMII mode") removed the delays from the RGMII mode and hence broke the networking. To fix the problem, commit a968b5e9d587 ("net: dsa: qca8k: Enable delay for RGMII_ID mode") was introduced. Now the correct phy mode is available so use it. Signed-off-by: Michal Vokáč Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index b715ab0fa1ff..091d829f6b05 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -125,7 +125,7 @@ ethphy0: port@0 { reg = <0>; label = "cpu"; - phy-mode = "rgmii"; + phy-mode = "rgmii-id"; ethernet = <&fec>; fixed-link { -- cgit From 0c17e83fe423467e3ccf0a02f99bd050a73bbeb4 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 1 Mar 2019 16:56:46 +0800 Subject: ARM: imx51: fix a leaked reference by adding missing of_node_put The call to of_get_next_child returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./arch/arm/mach-imx/mach-imx51.c:64:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 57, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Russell King Cc: Shawn Guo Cc: Sascha Hauer Cc: Pengutronix Kernel Team Cc: Fabio Estevam Cc: NXP Linux Team Cc: Lucas Stach Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mach-imx51.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-imx/mach-imx51.c b/arch/arm/mach-imx/mach-imx51.c index c7169c2f94c4..08c7892866c2 100644 --- a/arch/arm/mach-imx/mach-imx51.c +++ b/arch/arm/mach-imx/mach-imx51.c @@ -59,6 +59,7 @@ static void __init imx51_m4if_setup(void) return; m4if_base = of_iomap(np, 0); + of_node_put(np); if (!m4if_base) { pr_err("Unable to map M4IF registers\n"); return; -- cgit From 91740fc8242b4f260cfa4d4536d8551804777fae Mon Sep 17 00:00:00 2001 From: Kohji Okuno Date: Tue, 26 Feb 2019 11:34:13 +0900 Subject: ARM: imx6q: cpuidle: fix bug that CPU might not wake up at expected time In the current cpuidle implementation for i.MX6q, the CPU that sets 'WAIT_UNCLOCKED' and the CPU that returns to 'WAIT_CLOCKED' are always the same. While the CPU that sets 'WAIT_UNCLOCKED' is in IDLE state of "WAIT", if the other CPU wakes up and enters IDLE state of "WFI" istead of "WAIT", this CPU can not wake up at expired time. Because, in the case of "WFI", the CPU must be waked up by the local timer interrupt. But, while 'WAIT_UNCLOCKED' is set, the local timer is stopped, when all CPUs execute "wfi" instruction. As a result, the local timer interrupt is not fired. In this situation, this CPU will wake up by IRQ different from local timer. (e.g. broacast timer) So, this fix changes CPU to return to 'WAIT_CLOCKED'. Signed-off-by: Kohji Okuno Fixes: e5f9dec8ff5f ("ARM: imx6q: support WAIT mode using cpuidle") Cc: Signed-off-by: Shawn Guo --- arch/arm/mach-imx/cpuidle-imx6q.c | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index bfeb25aaf9a2..326e870d7123 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -16,30 +16,23 @@ #include "cpuidle.h" #include "hardware.h" -static atomic_t master = ATOMIC_INIT(0); -static DEFINE_SPINLOCK(master_lock); +static int num_idle_cpus = 0; +static DEFINE_SPINLOCK(cpuidle_lock); static int imx6q_enter_wait(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { - if (atomic_inc_return(&master) == num_online_cpus()) { - /* - * With this lock, we prevent other cpu to exit and enter - * this function again and become the master. - */ - if (!spin_trylock(&master_lock)) - goto idle; + spin_lock(&cpuidle_lock); + if (++num_idle_cpus == num_online_cpus()) imx6_set_lpm(WAIT_UNCLOCKED); - cpu_do_idle(); - imx6_set_lpm(WAIT_CLOCKED); - spin_unlock(&master_lock); - goto done; - } + spin_unlock(&cpuidle_lock); -idle: cpu_do_idle(); -done: - atomic_dec(&master); + + spin_lock(&cpuidle_lock); + if (num_idle_cpus-- == num_online_cpus()) + imx6_set_lpm(WAIT_CLOCKED); + spin_unlock(&cpuidle_lock); return index; } -- cgit From d1252f0237238b912c3e7a51bf237acf34c97983 Mon Sep 17 00:00:00 2001 From: Kristian Evensen Date: Sat, 2 Mar 2019 13:35:53 +0100 Subject: USB: serial: option: add support for Quectel EM12 The Quectel EM12 is a Cat. 12 LTE modem. It behaves in the exactly the same way as the EP06 (including the dynamic configuration behavior), so the same checks on reserved interfaces, etc. are needed. Signed-off-by: Kristian Evensen Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 11b21d9410f3..561d0b641120 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -246,6 +246,7 @@ static void option_instat_callback(struct urb *urb); #define QUECTEL_PRODUCT_EC25 0x0125 #define QUECTEL_PRODUCT_BG96 0x0296 #define QUECTEL_PRODUCT_EP06 0x0306 +#define QUECTEL_PRODUCT_EM12 0x0512 #define CMOTECH_VENDOR_ID 0x16d8 #define CMOTECH_PRODUCT_6001 0x6001 @@ -1087,6 +1088,9 @@ static const struct usb_device_id option_ids[] = { { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff), .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff), + .driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 }, + { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) }, { USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6003), -- cgit From 422c2537ba9d42320f8ab6573940269f87095320 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Tue, 5 Mar 2019 16:05:03 -0600 Subject: USB: serial: ftdi_sio: add additional NovaTech products Add PIDs for the NovaTech OrionLX+ and Orion I/O so they can be automatically detected. Signed-off-by: George McCollister Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/ftdi_sio.c | 2 ++ drivers/usb/serial/ftdi_sio_ids.h | 4 +++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c index 8f5b17471759..1d8461ae2c34 100644 --- a/drivers/usb/serial/ftdi_sio.c +++ b/drivers/usb/serial/ftdi_sio.c @@ -609,6 +609,8 @@ static const struct usb_device_id id_table_combined[] = { .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLXM_PID), .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORIONLX_PLUS_PID) }, + { USB_DEVICE(FTDI_VID, FTDI_NT_ORION_IO_PID) }, { USB_DEVICE(FTDI_VID, FTDI_SYNAPSE_SS200_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX_PID) }, { USB_DEVICE(FTDI_VID, FTDI_CUSTOMWARE_MINIPLEX2_PID) }, diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h index b863bedb55a1..5755f0df0025 100644 --- a/drivers/usb/serial/ftdi_sio_ids.h +++ b/drivers/usb/serial/ftdi_sio_ids.h @@ -567,7 +567,9 @@ /* * NovaTech product ids (FTDI_VID) */ -#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLXM_PID 0x7c90 /* OrionLXm Substation Automation Platform */ +#define FTDI_NT_ORIONLX_PLUS_PID 0x7c91 /* OrionLX+ Substation Automation Platform */ +#define FTDI_NT_ORION_IO_PID 0x7c92 /* Orion I/O */ /* * Synapse Wireless product ids (FTDI_VID) -- cgit From f8df5c2c3e2df5ffaf9fb5503da93d477a8c7db4 Mon Sep 17 00:00:00 2001 From: Mans Rullgard Date: Tue, 26 Feb 2019 17:07:10 +0000 Subject: USB: serial: option: set driver_info for SIM5218 and compatibles The SIMCom SIM5218 and compatible devices have 5 USB interfaces, only 4 of which are serial ports. The fifth is a network interface supported by the qmi-wwan driver. Furthermore, the serial ports do not support modem control signals. Add driver_info flags to reflect this. Signed-off-by: Mans Rullgard Fixes: ec0cd94d881c ("usb: option: add SIMCom SIM5218") Cc: stable # 3.2 Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index 561d0b641120..db5ece767d59 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1067,7 +1067,8 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(3) }, { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x6613)}, /* Onda H600/ZTE MF330 */ { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x0023)}, /* ONYX 3G device */ - { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000)}, /* SIMCom SIM5218 */ + { USB_DEVICE(QUALCOMM_VENDOR_ID, 0x9000), /* SIMCom SIM5218 */ + .driver_info = NCTRL(0) | NCTRL(1) | NCTRL(2) | NCTRL(3) | RSVD(4) }, /* Quectel products using Qualcomm vendor ID */ { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC15)}, { USB_DEVICE(QUALCOMM_VENDOR_ID, QUECTEL_PRODUCT_UC20), -- cgit From a23314e9d88d89d49e69db08f60b7caa470f04e1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Mar 2019 09:32:02 +0100 Subject: sched/cpufreq: Fix 32-bit math overflow Vincent Wang reported that get_next_freq() has a mult overflow bug on 32-bit platforms in the IOWAIT boost case, since in that case {util,max} are in freq units instead of capacity units. Solve this by moving the IOWAIT boost to capacity units. And since this means @max is constant; simplify the code. Reported-by: Vincent Wang Tested-by: Vincent Wang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Rafael J. Wysocki Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Chunyan Zhang Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Quentin Perret Cc: Rafael J. Wysocki Cc: Rik van Riel Cc: Thomas Gleixner Link: https://lkml.kernel.org/r/20190305083202.GU32494@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/sched/cpufreq_schedutil.c | 59 +++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 033ec7c45f13..1ccf77f6d346 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -48,10 +48,10 @@ struct sugov_cpu { bool iowait_boost_pending; unsigned int iowait_boost; - unsigned int iowait_boost_max; u64 last_update; unsigned long bw_dl; + unsigned long min; unsigned long max; /* The field below is for single-CPU policies only: */ @@ -303,8 +303,7 @@ static bool sugov_iowait_reset(struct sugov_cpu *sg_cpu, u64 time, if (delta_ns <= TICK_NSEC) return false; - sg_cpu->iowait_boost = set_iowait_boost - ? sg_cpu->sg_policy->policy->min : 0; + sg_cpu->iowait_boost = set_iowait_boost ? sg_cpu->min : 0; sg_cpu->iowait_boost_pending = set_iowait_boost; return true; @@ -344,14 +343,13 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, /* Double the boost at each request */ if (sg_cpu->iowait_boost) { - sg_cpu->iowait_boost <<= 1; - if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max) - sg_cpu->iowait_boost = sg_cpu->iowait_boost_max; + sg_cpu->iowait_boost = + min_t(unsigned int, sg_cpu->iowait_boost << 1, SCHED_CAPACITY_SCALE); return; } /* First wakeup after IO: start with minimum boost */ - sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min; + sg_cpu->iowait_boost = sg_cpu->min; } /** @@ -373,47 +371,38 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, * This mechanism is designed to boost high frequently IO waiting tasks, while * being more conservative on tasks which does sporadic IO operations. */ -static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, - unsigned long *util, unsigned long *max) +static unsigned long sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time, + unsigned long util, unsigned long max) { - unsigned int boost_util, boost_max; + unsigned long boost; /* No boost currently required */ if (!sg_cpu->iowait_boost) - return; + return util; /* Reset boost if the CPU appears to have been idle enough */ if (sugov_iowait_reset(sg_cpu, time, false)) - return; + return util; - /* - * An IO waiting task has just woken up: - * allow to further double the boost value - */ - if (sg_cpu->iowait_boost_pending) { - sg_cpu->iowait_boost_pending = false; - } else { + if (!sg_cpu->iowait_boost_pending) { /* - * Otherwise: reduce the boost value and disable it when we - * reach the minimum. + * No boost pending; reduce the boost value. */ sg_cpu->iowait_boost >>= 1; - if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) { + if (sg_cpu->iowait_boost < sg_cpu->min) { sg_cpu->iowait_boost = 0; - return; + return util; } } + sg_cpu->iowait_boost_pending = false; + /* - * Apply the current boost value: a CPU is boosted only if its current - * utilization is smaller then the current IO boost level. + * @util is already in capacity scale; convert iowait_boost + * into the same scale so we can compare. */ - boost_util = sg_cpu->iowait_boost; - boost_max = sg_cpu->iowait_boost_max; - if (*util * boost_max < *max * boost_util) { - *util = boost_util; - *max = boost_max; - } + boost = (sg_cpu->iowait_boost * max) >> SCHED_CAPACITY_SHIFT; + return max(boost, util); } #ifdef CONFIG_NO_HZ_COMMON @@ -460,7 +449,7 @@ static void sugov_update_single(struct update_util_data *hook, u64 time, util = sugov_get_util(sg_cpu); max = sg_cpu->max; - sugov_iowait_apply(sg_cpu, time, &util, &max); + util = sugov_iowait_apply(sg_cpu, time, util, max); next_f = get_next_freq(sg_policy, util, max); /* * Do not reduce the frequency if the CPU has not been idle @@ -500,7 +489,7 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) j_util = sugov_get_util(j_sg_cpu); j_max = j_sg_cpu->max; - sugov_iowait_apply(j_sg_cpu, time, &j_util, &j_max); + j_util = sugov_iowait_apply(j_sg_cpu, time, j_util, j_max); if (j_util * max > j_max * util) { util = j_util; @@ -837,7 +826,9 @@ static int sugov_start(struct cpufreq_policy *policy) memset(sg_cpu, 0, sizeof(*sg_cpu)); sg_cpu->cpu = cpu; sg_cpu->sg_policy = sg_policy; - sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq; + sg_cpu->min = + (SCHED_CAPACITY_SCALE * policy->cpuinfo.min_freq) / + policy->cpuinfo.max_freq; } for_each_cpu(cpu, policy->cpus) { -- cgit From 4c47acd824aaaa8fc6dc519fb4e08d1522105b7a Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Wed, 6 Mar 2019 20:11:42 +0300 Subject: sched/core: Fix buffer overflow in cgroup2 property cpu.max Add limit into sscanf format string for on-stack buffer. Signed-off-by: Konstantin Khlebnikov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Tejun Heo Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: H. Peter Anvin Cc: Johannes Weiner Cc: Li Zefan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Fixes: 0d5936344f30 ("sched: Implement interface for cgroup unified hierarchy") Link: https://lkml.kernel.org/r/155189230232.2620.13120481613524200065.stgit@buzz Signed-off-by: Ingo Molnar --- kernel/sched/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6b2c055564b5..b7a4afdc33cb 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6943,7 +6943,7 @@ static int __maybe_unused cpu_period_quota_parse(char *buf, { char tok[21]; /* U64_MAX */ - if (!sscanf(buf, "%s %llu", tok, periodp)) + if (sscanf(buf, "%20s %llu", tok, periodp) < 1) return -EINVAL; *periodp *= NSEC_PER_USEC; -- cgit From e25a7a944f1936b5134b7ee06bc432fc701e4aa3 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 11 Feb 2019 17:59:44 +0000 Subject: sched/fair: Comment some nohz_balancer_kick() kick conditions We now have a comment explaining the first sched_domain based NOHZ kick, so might as well comment them all. While at it, unwrap a line that fits under 80 characters. Co-authored-by: Peter Zijlstra Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dietmar.Eggemann@arm.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: morten.rasmussen@arm.com Cc: vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20190211175946.4961-2-valentin.schneider@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 8213ff6e365d..e6f7d39d4d45 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9612,8 +9612,12 @@ static void nohz_balancer_kick(struct rq *rq) sd = rcu_dereference(rq->sd); if (sd) { - if ((rq->cfs.h_nr_running >= 1) && - check_cpu_capacity(rq, sd)) { + /* + * If there's a CFS task and the current CPU has reduced + * capacity; kick the ILB to see if there's a better CPU to run + * on. + */ + if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) { flags = NOHZ_KICK_MASK; goto unlock; } @@ -9621,6 +9625,11 @@ static void nohz_balancer_kick(struct rq *rq) sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); if (sd) { + /* + * When ASYM_PACKING; see if there's a more preferred CPU + * currently idle; in which case, kick the ILB to move tasks + * around. + */ for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { if (sched_asym_prefer(i, cpu)) { flags = NOHZ_KICK_MASK; -- cgit From a0fe2cf086aef213d1b4bca1b1291a3dee8357c9 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 11 Feb 2019 17:59:45 +0000 Subject: sched/fair: Tune down misfit NOHZ kicks In this commit: 3b1baa6496e6 ("sched/fair: Add 'group_misfit_task' load-balance type") we set rq->misfit_task_load whenever the current running task has a utilization greater than 80% of rq->cpu_capacity. A non-zero value in this field enables misfit load balancing. However, if the task being looked at is already running on a CPU of highest capacity, there's nothing more we can do for it. We can currently spot this in update_sd_pick_busiest(), which prevents us from selecting a sched_group of group_type == group_misfit_task as the busiest group, but we don't do any of that in nohz_balancer_kick(). This means that we could repeatedly kick NOHZ CPUs when there's no improvements in terms of load balance to be done. Introduce a check_misfit_status() helper that returns true iff there is a CPU in the system that could give more CPU capacity to a rq's misfit task - IOW, there exists a CPU of higher capacity_orig or the rq's CPU is severely pressured by rt/IRQ. Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dietmar.Eggemann@arm.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: morten.rasmussen@arm.com Cc: vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20190211175946.4961-3-valentin.schneider@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e6f7d39d4d45..f0d2f8a352bf 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8058,6 +8058,18 @@ check_cpu_capacity(struct rq *rq, struct sched_domain *sd) (rq->cpu_capacity_orig * 100)); } +/* + * Check whether a rq has a misfit task and if it looks like we can actually + * help that task: we can migrate the task to a CPU of higher capacity, or + * the task's current CPU is heavily pressured. + */ +static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd) +{ + return rq->misfit_task_load && + (rq->cpu_capacity_orig < rq->rd->max_cpu_capacity || + check_cpu_capacity(rq, sd)); +} + /* * Group imbalance indicates (and tries to solve) the problem where balancing * groups is inadequate due to ->cpus_allowed constraints. @@ -9585,7 +9597,7 @@ static void nohz_balancer_kick(struct rq *rq) if (time_before(now, nohz.next_balance)) goto out; - if (rq->nr_running >= 2 || rq->misfit_task_load) { + if (rq->nr_running >= 2) { flags = NOHZ_KICK_MASK; goto out; } @@ -9623,6 +9635,18 @@ static void nohz_balancer_kick(struct rq *rq) } } + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu)); + if (sd) { + /* + * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU + * to run the misfit task on. + */ + if (check_misfit_status(rq, sd)) { + flags = NOHZ_KICK_MASK; + goto unlock; + } + } + sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); if (sd) { /* -- cgit From b9a7b8831600afc51c9ba52c05f12db2266f01c7 Mon Sep 17 00:00:00 2001 From: Valentin Schneider Date: Mon, 11 Feb 2019 17:59:46 +0000 Subject: sched/fair: Skip LLC NOHZ logic for asymmetric systems The LLC NOHZ condition will become true as soon as >=2 CPUs in a single LLC domain are busy. On big.LITTLE systems, this translates to two or more CPUs of a "cluster" (big or LITTLE) being busy. Issuing a NOHZ kick in these conditions isn't desired for asymmetric systems, as if the busy CPUs can provide enough compute capacity to the running tasks, then we can leave the NOHZ CPUs in peace. Skip the LLC NOHZ condition for asymmetric systems, and rely on nr_running & capacity checks to trigger NOHZ kicks when the system actually needs them. Suggested-by: Morten Rasmussen Signed-off-by: Valentin Schneider Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Dietmar.Eggemann@arm.com Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: vincent.guittot@linaro.org Link: https://lkml.kernel.org/r/20190211175946.4961-4-valentin.schneider@arm.com Signed-off-by: Ingo Molnar --- kernel/sched/fair.c | 65 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index f0d2f8a352bf..51003e1c794d 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9603,24 +9603,6 @@ static void nohz_balancer_kick(struct rq *rq) } rcu_read_lock(); - sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); - if (sds) { - /* - * If there is an imbalance between LLC domains (IOW we could - * increase the overall cache use), we need some less-loaded LLC - * domain to pull some load. Likewise, we may need to spread - * load within the current LLC domain (e.g. packed SMT cores but - * other CPUs are idle). We can't really know from here how busy - * the others are - so just get a nohz balance going if it looks - * like this LLC domain has tasks we could move. - */ - nr_busy = atomic_read(&sds->nr_busy_cpus); - if (nr_busy > 1) { - flags = NOHZ_KICK_MASK; - goto unlock; - } - - } sd = rcu_dereference(rq->sd); if (sd) { @@ -9635,6 +9617,21 @@ static void nohz_balancer_kick(struct rq *rq) } } + sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); + if (sd) { + /* + * When ASYM_PACKING; see if there's a more preferred CPU + * currently idle; in which case, kick the ILB to move tasks + * around. + */ + for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { + if (sched_asym_prefer(i, cpu)) { + flags = NOHZ_KICK_MASK; + goto unlock; + } + } + } + sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu)); if (sd) { /* @@ -9645,20 +9642,32 @@ static void nohz_balancer_kick(struct rq *rq) flags = NOHZ_KICK_MASK; goto unlock; } + + /* + * For asymmetric systems, we do not want to nicely balance + * cache use, instead we want to embrace asymmetry and only + * ensure tasks have enough CPU capacity. + * + * Skip the LLC logic because it's not relevant in that case. + */ + goto unlock; } - sd = rcu_dereference(per_cpu(sd_asym_packing, cpu)); - if (sd) { + sds = rcu_dereference(per_cpu(sd_llc_shared, cpu)); + if (sds) { /* - * When ASYM_PACKING; see if there's a more preferred CPU - * currently idle; in which case, kick the ILB to move tasks - * around. + * If there is an imbalance between LLC domains (IOW we could + * increase the overall cache use), we need some less-loaded LLC + * domain to pull some load. Likewise, we may need to spread + * load within the current LLC domain (e.g. packed SMT cores but + * other CPUs are idle). We can't really know from here how busy + * the others are - so just get a nohz balance going if it looks + * like this LLC domain has tasks we could move. */ - for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) { - if (sched_asym_prefer(i, cpu)) { - flags = NOHZ_KICK_MASK; - goto unlock; - } + nr_busy = atomic_read(&sds->nr_busy_cpus); + if (nr_busy > 1) { + flags = NOHZ_KICK_MASK; + goto unlock; } } unlock: -- cgit From a3151724437f54076cc10bc02b1c4f0003ae36cd Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 18 Mar 2019 22:24:03 +0100 Subject: x86/mm: Don't leak kernel addresses Since commit: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") at boot "____ptrval____" is printed instead of actual addresses: found SMP MP-table at [mem 0x000f5cc0-0x000f5ccf] mapped at [(____ptrval____)] Instead of changing the print to "%px", and leaking a kernel addresses, just remove the print completely, like in: 071929dbdd865f77 ("arm64: Stop printing the virtual memory layout"). Signed-off-by: Matteo Croce Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3482460d984d..1bfe5c6e6cfe 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -598,8 +598,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf_base = base; mpf_found = true; - pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", - base, base + sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010lx-%#010lx]\n", + base, base + sizeof(*mpf) - 1); memblock_reserve(base, sizeof(*mpf)); if (mpf->physptr) -- cgit From a872fc8bf0304fd56347e94468f07d7e82c679ea Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Feb 2019 00:43:22 +0900 Subject: arm64: kprobes: Move extable address check into arch_prepare_kprobe() Move extable address check into arch_prepare_kprobe() from arch_within_kprobe_blacklist(). The blacklist is exposed via debugfs as a list of symbols. The extable entries are smaller, so must be filtered out by arch_prepare_kprobe(). Acked-by: Will Deacon Reviewed-by: James Morse Signed-off-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 7fb6f3aa5ceb..16bc3b2e9351 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -102,6 +102,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (in_exception_text(probe_addr)) return -EINVAL; + + if (search_exception_tables(probe_addr)) + return -EINVAL; + if (probe_addr >= (unsigned long) __start_rodata && probe_addr <= (unsigned long) __end_rodata) return -EINVAL; @@ -485,8 +489,7 @@ bool arch_within_kprobe_blacklist(unsigned long addr) (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || (addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - !!search_exception_tables(addr)) + addr < (unsigned long)__hyp_text_end)) return true; if (!is_kernel_in_hyp_mode()) { -- cgit From b5586163de1ce90317cd4037f69b14105be9f656 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Feb 2019 00:43:51 +0900 Subject: arm64: kprobes: Remove unneeded RODATA check Remove unneeded RODATA check from arch_prepare_kprobe(). Since check_kprobe_address_safe() already ensured that the probe address is in kernel text, we don't need to check whether the address in RODATA or not. That must be always false. Acked-by: Will Deacon Signed-off-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index 16bc3b2e9351..b168873147f5 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -91,8 +91,6 @@ static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs) int __kprobes arch_prepare_kprobe(struct kprobe *p) { unsigned long probe_addr = (unsigned long)p->addr; - extern char __start_rodata[]; - extern char __end_rodata[]; if (probe_addr & 0x3) return -EINVAL; @@ -106,10 +104,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (search_exception_tables(probe_addr)) return -EINVAL; - if (probe_addr >= (unsigned long) __start_rodata && - probe_addr <= (unsigned long) __end_rodata) - return -EINVAL; - /* decode instruction */ switch (arm_kprobe_decode_insn(p->addr, &p->ainsn)) { case INSN_REJECTED: /* insn not supported */ -- cgit From 6e08af0f10dcde01f0bdcc64cf91fea9d25e77cc Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Feb 2019 00:44:19 +0900 Subject: arm64: kprobes: Move exception_text check in blacklist Move exception/irqentry text address check in blacklist, since those are symbol based rejection. If we prohibit probing on the symbols in exception_text, those should be blacklisted. Acked-by: Will Deacon Signed-off-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index b168873147f5..d6697930d075 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -98,9 +98,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) /* copy instruction */ p->opcode = le32_to_cpu(*p->addr); - if (in_exception_text(probe_addr)) - return -EINVAL; - if (search_exception_tables(probe_addr)) return -EINVAL; @@ -483,7 +480,8 @@ bool arch_within_kprobe_blacklist(unsigned long addr) (addr >= (unsigned long)__idmap_text_start && addr < (unsigned long)__idmap_text_end) || (addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end)) + addr < (unsigned long)__hyp_text_end) || + in_exception_text(addr)) return true; if (!is_kernel_in_hyp_mode()) { -- cgit From 6a019a92aa580cd5abdaae578a2a297c9af80174 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Feb 2019 00:44:48 +0900 Subject: arm64: kprobes: Use arch_populate_kprobe_blacklist() Use arch_populate_kprobe_blacklist() instead of arch_within_kprobe_blacklist() so that we can see the full blacklisted symbols under the debugfs. Acked-by: Will Deacon Signed-off-by: Masami Hiramatsu [catalin.marinas@arm.com: Add arch_populate_kprobe_blacklist() comment] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/probes/kprobes.c | 49 +++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index d6697930d075..7a679caf4585 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -471,26 +471,37 @@ kprobe_breakpoint_handler(struct pt_regs *regs, unsigned int esr) return DBG_HOOK_HANDLED; } -bool arch_within_kprobe_blacklist(unsigned long addr) +/* + * Provide a blacklist of symbols identifying ranges which cannot be kprobed. + * This blacklist is exposed to userspace via debugfs (kprobes/blacklist). + */ +int __init arch_populate_kprobe_blacklist(void) { - if ((addr >= (unsigned long)__kprobes_text_start && - addr < (unsigned long)__kprobes_text_end) || - (addr >= (unsigned long)__entry_text_start && - addr < (unsigned long)__entry_text_end) || - (addr >= (unsigned long)__idmap_text_start && - addr < (unsigned long)__idmap_text_end) || - (addr >= (unsigned long)__hyp_text_start && - addr < (unsigned long)__hyp_text_end) || - in_exception_text(addr)) - return true; - - if (!is_kernel_in_hyp_mode()) { - if ((addr >= (unsigned long)__hyp_idmap_text_start && - addr < (unsigned long)__hyp_idmap_text_end)) - return true; - } - - return false; + int ret; + + ret = kprobe_add_area_blacklist((unsigned long)__entry_text_start, + (unsigned long)__entry_text_end); + if (ret) + return ret; + ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start, + (unsigned long)__irqentry_text_end); + if (ret) + return ret; + ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start, + (unsigned long)__exception_text_end); + if (ret) + return ret; + ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start, + (unsigned long)__idmap_text_end); + if (ret) + return ret; + ret = kprobe_add_area_blacklist((unsigned long)__hyp_text_start, + (unsigned long)__hyp_text_end); + if (ret || is_kernel_in_hyp_mode()) + return ret; + ret = kprobe_add_area_blacklist((unsigned long)__hyp_idmap_text_start, + (unsigned long)__hyp_idmap_text_end); + return ret; } void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) -- cgit From 7ff2c2a1a71e83f74574b8001ea88deb3c166ad7 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 18 Mar 2019 17:45:19 +0200 Subject: btrfs: Fix bound checking in qgroup_trace_new_subtree_blocks If 'cur_level' is 7 then the bound checking at the top of the function will actually pass. Later on, it's possible to dereference ds_path->nodes[cur_level+1] which will be an out of bounds. The correct check will be cur_level >= BTRFS_MAX_LEVEL - 1 . Fixes-coverty-id: 1440918 Fixes-coverty-id: 1440911 Fixes: ea49f3e73c4b ("btrfs: qgroup: Introduce function to find all new tree blocks of reloc tree") CC: stable@vger.kernel.org # 4.20+ Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/qgroup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index eb680b715dd6..e659d9d61107 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1922,8 +1922,8 @@ static int qgroup_trace_new_subtree_blocks(struct btrfs_trans_handle* trans, int i; /* Level sanity check */ - if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL || - root_level < 0 || root_level >= BTRFS_MAX_LEVEL || + if (cur_level < 0 || cur_level >= BTRFS_MAX_LEVEL - 1 || + root_level < 0 || root_level >= BTRFS_MAX_LEVEL - 1 || root_level < cur_level) { btrfs_err_rl(fs_info, "%s: bad levels, cur_level=%d root_level=%d", -- cgit From 139a56170de67101791d6e6c8e940c6328393fe9 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Mon, 18 Mar 2019 17:45:20 +0200 Subject: btrfs: Avoid possible qgroup_rsv_size overflow in btrfs_calculate_inode_block_rsv_size qgroup_rsv_size is calculated as the product of outstanding_extent * fs_info->nodesize. The product is calculated with 32 bit precision since both variables are defined as u32. Yet qgroup_rsv_size expects a 64 bit result. Avoid possible multiplication overflow by casting outstanding_extent to u64. Such overflow would in the worst case (64K nodesize) require more than 65536 extents, which is quite large and i'ts not likely that it would happen in practice. Fixes-coverity-id: 1435101 Fixes: ff6bc37eb7f6 ("btrfs: qgroup: Use independent and accurate per inode qgroup rsv") CC: stable@vger.kernel.org # 4.19+ Reviewed-by: Qu Wenruo Signed-off-by: Nikolay Borisov Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 1d49694e6ae3..c5880329ae37 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -6174,7 +6174,7 @@ static void btrfs_calculate_inode_block_rsv_size(struct btrfs_fs_info *fs_info, * * This is overestimating in most cases. */ - qgroup_rsv_size = outstanding_extents * fs_info->nodesize; + qgroup_rsv_size = (u64)outstanding_extents * fs_info->nodesize; spin_lock(&block_rsv->lock); block_rsv->size = reserve_size; -- cgit From e82adc1074a7356f1158233551df9e86b7ebfb82 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 18 Mar 2019 16:18:30 -0500 Subject: usb: typec: Fix unchecked return value Currently there is no check on platform_get_irq() return value in case it fails, hence never actually reporting any errors and causing unexpected behavior when using such value as argument for function regmap_irq_get_virq(). Fix this by adding a proper check, a message error and return *irq* in case platform_get_irq() fails. Addresses-Coverity-ID: 1443899 ("Improper use of negative value") Fixes: d2061f9cc32d ("usb: typec: add driver for Intel Whiskey Cove PMIC USB Type-C PHY") Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Reviewed-by: Guenter Roeck Acked-by: Heikki Krogerus Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/wcove.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/usb/typec/tcpm/wcove.c b/drivers/usb/typec/tcpm/wcove.c index 423208e19383..6770afd40765 100644 --- a/drivers/usb/typec/tcpm/wcove.c +++ b/drivers/usb/typec/tcpm/wcove.c @@ -615,8 +615,13 @@ static int wcove_typec_probe(struct platform_device *pdev) wcove->dev = &pdev->dev; wcove->regmap = pmic->regmap; - irq = regmap_irq_get_virq(pmic->irq_chip_data_chgr, - platform_get_irq(pdev, 0)); + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(&pdev->dev, "Failed to get IRQ: %d\n", irq); + return irq; + } + + irq = regmap_irq_get_virq(pmic->irq_chip_data_chgr, irq); if (irq < 0) return irq; -- cgit From 40fc165304f0faaae78b761f8ee30b5d216b1850 Mon Sep 17 00:00:00 2001 From: Yasushi Asano Date: Mon, 18 Feb 2019 11:26:34 +0100 Subject: usb: host: xhci-rcar: Add XHCI_TRUST_TX_LENGTH quirk When plugging BUFFALO LUA4-U3-AGT USB3.0 to Gigabit Ethernet LAN Adapter, warning messages filled up dmesg. [ 101.098287] xhci-hcd ee000000.usb: WARN Successful completion on short TX for slot 1 ep 4: needs XHCI_TRUST_TX_LENGTH quirk? [ 101.117463] xhci-hcd ee000000.usb: WARN Successful completion on short TX for slot 1 ep 4: needs XHCI_TRUST_TX_LENGTH quirk? [ 101.136513] xhci-hcd ee000000.usb: WARN Successful completion on short TX for slot 1 ep 4: needs XHCI_TRUST_TX_LENGTH quirk? Adding the XHCI_TRUST_TX_LENGTH quirk resolves the issue. Signed-off-by: Yasushi Asano Signed-off-by: Spyridon Papageorgiou Acked-by: Yoshihiro Shimoda Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-rcar.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/host/xhci-rcar.c b/drivers/usb/host/xhci-rcar.c index a6e463715779..671bce18782c 100644 --- a/drivers/usb/host/xhci-rcar.c +++ b/drivers/usb/host/xhci-rcar.c @@ -246,6 +246,7 @@ int xhci_rcar_init_quirk(struct usb_hcd *hcd) if (!xhci_rcar_wait_for_pll_active(hcd)) return -ETIMEDOUT; + xhci->quirks |= XHCI_TRUST_TX_LENGTH; return xhci_rcar_download_firmware(hcd); } -- cgit From 976daf9d1199932df80e7b04546d1a1bd4ed5ece Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 16 Mar 2019 16:57:12 +0100 Subject: usb: typec: tcpm: Try PD-2.0 if sink does not respond to 3.0 source-caps PD 2.0 sinks are supposed to accept src-capabilities with a 3.0 header and simply ignore any src PDOs which the sink does not understand such as PPS but some 2.0 sinks instead ignore the entire PD_DATA_SOURCE_CAP message, causing contract negotiation to fail. This commit fixes such sinks not working by re-trying the contract negotiation with PD-2.0 source-caps messages if we don't have a contract after PD_N_HARD_RESET_COUNT hard-reset attempts. The problem fixed by this commit was noticed with a Type-C to VGA dongle. Signed-off-by: Hans de Goede Reviewed-by: Guenter Roeck Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/typec/tcpm/tcpm.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 0f62db091d8d..a2233d72ae7c 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -37,6 +37,7 @@ S(SRC_ATTACHED), \ S(SRC_STARTUP), \ S(SRC_SEND_CAPABILITIES), \ + S(SRC_SEND_CAPABILITIES_TIMEOUT), \ S(SRC_NEGOTIATE_CAPABILITIES), \ S(SRC_TRANSITION_SUPPLY), \ S(SRC_READY), \ @@ -2966,10 +2967,34 @@ static void run_state_machine(struct tcpm_port *port) /* port->hard_reset_count = 0; */ port->caps_count = 0; port->pd_capable = true; - tcpm_set_state_cond(port, hard_reset_state(port), + tcpm_set_state_cond(port, SRC_SEND_CAPABILITIES_TIMEOUT, PD_T_SEND_SOURCE_CAP); } break; + case SRC_SEND_CAPABILITIES_TIMEOUT: + /* + * Error recovery for a PD_DATA_SOURCE_CAP reply timeout. + * + * PD 2.0 sinks are supposed to accept src-capabilities with a + * 3.0 header and simply ignore any src PDOs which the sink does + * not understand such as PPS but some 2.0 sinks instead ignore + * the entire PD_DATA_SOURCE_CAP message, causing contract + * negotiation to fail. + * + * After PD_N_HARD_RESET_COUNT hard-reset attempts, we try + * sending src-capabilities with a lower PD revision to + * make these broken sinks work. + */ + if (port->hard_reset_count < PD_N_HARD_RESET_COUNT) { + tcpm_set_state(port, HARD_RESET_SEND, 0); + } else if (port->negotiated_rev > PD_REV20) { + port->negotiated_rev--; + port->hard_reset_count = 0; + tcpm_set_state(port, SRC_SEND_CAPABILITIES, 0); + } else { + tcpm_set_state(port, hard_reset_state(port), 0); + } + break; case SRC_NEGOTIATE_CAPABILITIES: ret = tcpm_pd_check_request(port); if (ret < 0) { -- cgit From 238e0268c82789e4c107a37045d529a6dbce51a9 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Fri, 1 Mar 2019 11:05:45 +0000 Subject: usb: common: Consider only available nodes for dr_mode There are cases where multiple device tree nodes point to the same phy node by means of the "phys" property, but we should only consider those nodes that are marked as available rather than just any node. Fixes: 98bfb3946695 ("usb: of: add an api to get dr_mode by the phy node") Cc: stable@vger.kernel.org # v4.4+ Signed-off-by: Fabrizio Castro Signed-off-by: Greg Kroah-Hartman --- drivers/usb/common/common.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/usb/common/common.c b/drivers/usb/common/common.c index 48277bbc15e4..73c8e6591746 100644 --- a/drivers/usb/common/common.c +++ b/drivers/usb/common/common.c @@ -145,6 +145,8 @@ enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0) do { controller = of_find_node_with_property(controller, "phys"); + if (!of_device_is_available(controller)) + continue; index = 0; do { if (arg0 == -1) { -- cgit From 7c9abe12b359d988970c5e38f0e190249e5ae00f Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Mar 2019 13:51:22 +0100 Subject: netfilter: nf_flowtable: remove duplicated transition in diagram No direct transition from prerouting to forward hook, routing lookup needs to happen first. Fixes: 19b351f16fd9 ("netfilter: add flowtable documentation") Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_flowtable.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt index 54128c50d508..ca2136c76042 100644 --- a/Documentation/networking/nf_flowtable.txt +++ b/Documentation/networking/nf_flowtable.txt @@ -44,10 +44,10 @@ including the Netfilter hooks and the flowtable fastpath bypass. / \ / \ |Routing | / \ --> ingress ---> prerouting ---> |decision| | postrouting |--> neigh_xmit \_________/ \__________/ ---------- \____________/ ^ - | ^ | | ^ | - flowtable | | ____\/___ | | - | | | / \ | | - __\/___ | --------->| forward |------------ | + | ^ | ^ | + flowtable | ____\/___ | | + | | / \ | | + __\/___ | | forward |------------ | |-----| | \_________/ | |-----| | 'flow offload' rule | |-----| | adds entry to | -- cgit From 22feda47b574c2854cc1a8447a2ae18598752375 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 18 Mar 2019 09:50:24 -0500 Subject: usb: usb251xb: Remove unnecessary comparison of unsigned integer with >= 0 There is no need to compare *port* with >= 0 because such comparison of an unsigned value is always true. Fix this by removing such comparison. Addresses-Coverity-ID: 1443949 ("Unsigned compared against 0") Fixes: 02a50b875046 ("usb: usb251xb: add usb data lane port swap feature") Signed-off-by: Gustavo A. R. Silva Reviewed-by: Richard Leitner Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb251xb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 4d72b7d1d383..2c8e2cad7e10 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -547,7 +547,7 @@ static int usb251xb_get_ofdata(struct usb251xb *hub, */ hub->port_swap = USB251XB_DEF_PORT_SWAP; of_property_for_each_u32(np, "swap-dx-lanes", prop, p, port) { - if ((port >= 0) && (port <= data->port_cnt)) + if (port <= data->port_cnt) hub->port_swap |= BIT(port); } -- cgit From 32f47179833b63de72427131169809065db6745e Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 18:50:56 -0500 Subject: serial: mvebu-uart: Fix to avoid a potential NULL pointer dereference of_match_device on failure to find a matching device can return a NULL pointer. The patch checks for such a scenrio and passes the error upstream. Signed-off-by: Aditya Pakki Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mvebu-uart.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c index 231f751d1ef4..7e7b1559fa36 100644 --- a/drivers/tty/serial/mvebu-uart.c +++ b/drivers/tty/serial/mvebu-uart.c @@ -810,6 +810,9 @@ static int mvebu_uart_probe(struct platform_device *pdev) return -EINVAL; } + if (!match) + return -ENODEV; + /* Assume that all UART ports have a DT alias or none has */ id = of_alias_get_id(pdev->dev.of_node, "serial"); if (!pdev->dev.of_node || id < 0) -- cgit From 3a10e3dd52e80b9a97a3346020024d17b2c272d6 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 18:44:14 -0500 Subject: serial: max310x: Fix to avoid potential NULL pointer dereference of_match_device can return a NULL pointer when matching device is not found. This patch avoids a scenario causing NULL pointer derefernce. Signed-off-by: Aditya Pakki Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/max310x.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c index f5bdde405627..450ba6d7996c 100644 --- a/drivers/tty/serial/max310x.c +++ b/drivers/tty/serial/max310x.c @@ -1415,6 +1415,8 @@ static int max310x_spi_probe(struct spi_device *spi) if (spi->dev.of_node) { const struct of_device_id *of_id = of_match_device(max310x_dt_ids, &spi->dev); + if (!of_id) + return -ENODEV; devtype = (struct max310x_devtype *)of_id->data; } else { -- cgit From c85be041065c0be8bc48eda4c45e0319caf1d0e5 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 15 Mar 2019 12:16:06 -0500 Subject: tty: atmel_serial: fix a potential NULL pointer dereference In case dmaengine_prep_dma_cyclic fails, the fix returns a proper error code to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Fixes: 34df42f59a60 ("serial: at91: add rx dma support") Acked-by: Richard Genoud Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 05147fe24343..41b728d223d1 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -1288,6 +1288,10 @@ static int atmel_prepare_rx_dma(struct uart_port *port) sg_dma_len(&atmel_port->sg_rx)/2, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT); + if (!desc) { + dev_err(port->dev, "Preparing DMA cyclic failed\n"); + goto chan_err; + } desc->callback = atmel_complete_rx_dma; desc->callback_param = port; atmel_port->desc_rx = desc; -- cgit From 6734330654dac550f12e932996b868c6d0dcb421 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 02:21:51 -0500 Subject: tty: mxs-auart: fix a potential NULL pointer dereference In case ioremap fails, the fix returns -ENOMEM to avoid NULL pointer dereferences. Multiple places use port.membase. Signed-off-by: Kangjie Lu Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/mxs-auart.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c index 27235a526cce..4c188f4079b3 100644 --- a/drivers/tty/serial/mxs-auart.c +++ b/drivers/tty/serial/mxs-auart.c @@ -1686,6 +1686,10 @@ static int mxs_auart_probe(struct platform_device *pdev) s->port.mapbase = r->start; s->port.membase = ioremap(r->start, resource_size(r)); + if (!s->port.membase) { + ret = -ENOMEM; + goto out_disable_clks; + } s->port.ops = &mxs_auart_ops; s->port.iotype = UPIO_MEM; s->port.fifosize = MXS_AUART_FIFO_SIZE; -- cgit From ac0cdb3d990108df795b676cd0d0e65ac34b2273 Mon Sep 17 00:00:00 2001 From: Mao Wenan Date: Fri, 8 Mar 2019 22:08:31 +0800 Subject: sc16is7xx: missing unregister/delete driver on error in sc16is7xx_init() Add the missing uart_unregister_driver() and i2c_del_driver() before return from sc16is7xx_init() in the error handling case. Signed-off-by: Mao Wenan Reviewed-by: Vladimir Zapolskiy Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sc16is7xx.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index 635178cf3eed..09a183dfc526 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -1507,7 +1507,7 @@ static int __init sc16is7xx_init(void) ret = i2c_add_driver(&sc16is7xx_i2c_uart_driver); if (ret < 0) { pr_err("failed to init sc16is7xx i2c --> %d\n", ret); - return ret; + goto err_i2c; } #endif @@ -1515,10 +1515,18 @@ static int __init sc16is7xx_init(void) ret = spi_register_driver(&sc16is7xx_spi_uart_driver); if (ret < 0) { pr_err("failed to init sc16is7xx spi --> %d\n", ret); - return ret; + goto err_spi; } #endif return ret; + +err_spi: +#ifdef CONFIG_SERIAL_SC16IS7XX_I2C + i2c_del_driver(&sc16is7xx_i2c_uart_driver); +#endif +err_i2c: + uart_unregister_driver(&sc16is7xx_uart); + return ret; } module_init(sc16is7xx_init); -- cgit From c5cbc78acf693f5605d4a85b1327fa7933daf092 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Fri, 8 Mar 2019 11:37:44 -0700 Subject: tty: serial: qcom_geni_serial: Initialize baud in qcom_geni_console_setup When building with -Wsometimes-uninitialized, Clang warns: drivers/tty/serial/qcom_geni_serial.c:1079:6: warning: variable 'baud' is used uninitialized whenever 'if' condition is false [-Wsometimes-uninitialized] It's not wrong; when options is NULL, baud has no default value. Use 9600 as that is a sane default. Link: https://github.com/ClangBuiltLinux/linux/issues/395 Suggested-by: Greg Kroah-Hartman Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/qcom_geni_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c index 3bcec1c20219..35e5f9c5d5be 100644 --- a/drivers/tty/serial/qcom_geni_serial.c +++ b/drivers/tty/serial/qcom_geni_serial.c @@ -1050,7 +1050,7 @@ static int __init qcom_geni_console_setup(struct console *co, char *options) { struct uart_port *uport; struct qcom_geni_serial_port *port; - int baud; + int baud = 9600; int bits = 8; int parity = 'n'; int flow = 'n'; -- cgit From 72ff51d8dd262d1fef25baedc2ac35116435be47 Mon Sep 17 00:00:00 2001 From: Petr Štetiar Date: Wed, 6 Mar 2019 17:54:03 +0100 Subject: serial: ar933x_uart: Fix build failure with disabled console MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Andrey has reported on OpenWrt's bug tracking system[1], that he currently can't use ar93xx_uart as pure serial UART without console (CONFIG_SERIAL_8250_CONSOLE and CONFIG_SERIAL_AR933X_CONSOLE undefined), because compilation ends with following error: ar933x_uart.c: In function 'ar933x_uart_console_write': ar933x_uart.c:550:14: error: 'struct uart_port' has no member named 'sysrq' So this patch moves all the code related to console handling behind series of CONFIG_SERIAL_AR933X_CONSOLE ifdefs. 1. https://bugs.openwrt.org/index.php?do=details&task_id=2152 Cc: Greg Kroah-Hartman Cc: Jiri Slaby Cc: Andrey Batyiev Reported-by: Andrey Batyiev Tested-by: Andrey Batyiev Signed-off-by: Petr Štetiar Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/ar933x_uart.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c index db5df3d54818..3bdd56a1021b 100644 --- a/drivers/tty/serial/ar933x_uart.c +++ b/drivers/tty/serial/ar933x_uart.c @@ -49,11 +49,6 @@ struct ar933x_uart_port { struct clk *clk; }; -static inline bool ar933x_uart_console_enabled(void) -{ - return IS_ENABLED(CONFIG_SERIAL_AR933X_CONSOLE); -} - static inline unsigned int ar933x_uart_read(struct ar933x_uart_port *up, int offset) { @@ -508,6 +503,7 @@ static const struct uart_ops ar933x_uart_ops = { .verify_port = ar933x_uart_verify_port, }; +#ifdef CONFIG_SERIAL_AR933X_CONSOLE static struct ar933x_uart_port * ar933x_console_ports[CONFIG_SERIAL_AR933X_NR_UARTS]; @@ -604,14 +600,7 @@ static struct console ar933x_uart_console = { .index = -1, .data = &ar933x_uart_driver, }; - -static void ar933x_uart_add_console_port(struct ar933x_uart_port *up) -{ - if (!ar933x_uart_console_enabled()) - return; - - ar933x_console_ports[up->port.line] = up; -} +#endif /* CONFIG_SERIAL_AR933X_CONSOLE */ static struct uart_driver ar933x_uart_driver = { .owner = THIS_MODULE, @@ -700,7 +689,9 @@ static int ar933x_uart_probe(struct platform_device *pdev) baud = ar933x_uart_get_baud(port->uartclk, 0, AR933X_UART_MAX_STEP); up->max_baud = min_t(unsigned int, baud, AR933X_UART_MAX_BAUD); - ar933x_uart_add_console_port(up); +#ifdef CONFIG_SERIAL_AR933X_CONSOLE + ar933x_console_ports[up->port.line] = up; +#endif ret = uart_add_one_port(&ar933x_uart_driver, &up->port); if (ret) @@ -749,8 +740,9 @@ static int __init ar933x_uart_init(void) { int ret; - if (ar933x_uart_console_enabled()) - ar933x_uart_driver.cons = &ar933x_uart_console; +#ifdef CONFIG_SERIAL_AR933X_CONSOLE + ar933x_uart_driver.cons = &ar933x_uart_console; +#endif ret = uart_register_driver(&ar933x_uart_driver); if (ret) -- cgit From 93bcefd4c6bad4c69dbc4edcd3fbf774b24d930d Mon Sep 17 00:00:00 2001 From: Hoan Nguyen An Date: Mon, 18 Mar 2019 18:26:32 +0900 Subject: serial: sh-sci: Fix setting SCSCR_TIE while transferring data We disable transmission interrupt (clear SCSCR_TIE) after all data has been transmitted (if uart_circ_empty(xmit)). While transmitting, if the data is still in the tty buffer, re-enable the SCSCR_TIE bit, which was done at sci_start_tx(). This is unnecessary processing, wasting CPU operation if the data transmission length is large. And further, transmit end, FIFO empty bits disabling have also been performed in the step above. Signed-off-by: Hoan Nguyen An Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/sh-sci.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c index 060fcd42b6d5..2d1c626312cd 100644 --- a/drivers/tty/serial/sh-sci.c +++ b/drivers/tty/serial/sh-sci.c @@ -838,19 +838,9 @@ static void sci_transmit_chars(struct uart_port *port) if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) uart_write_wakeup(port); - if (uart_circ_empty(xmit)) { + if (uart_circ_empty(xmit)) sci_stop_tx(port); - } else { - ctrl = serial_port_in(port, SCSCR); - - if (port->type != PORT_SCI) { - serial_port_in(port, SCxSR); /* Dummy read */ - sci_clear_SCxSR(port, SCxSR_TDxE_CLEAR(port)); - } - ctrl |= SCSCR_TIE; - serial_port_out(port, SCSCR, ctrl); - } } /* On SH3, SCIF may read end-of-break as a space->mark char */ -- cgit From 3dbcea54b3ff706c58f8e8d4470f5e5d3d24a6a0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 15 Mar 2019 12:22:36 +0000 Subject: arm64: apply workaround on A64FX v1r0 Fujitsu erratum 010001 applies to A64FX v0r0 and v1r0, and we try to handle either by masking MIDR with MIDR_FUJITSU_ERRATUM_010001_MASK before comparing it to MIDR_FUJITSU_ERRATUM_010001. Unfortunately, MIDR_FUJITSU_ERRATUM_010001 is constructed incorrectly using MIDR_VARIANT(), which is intended to extract the variant field from MIDR_EL1, rather than generate the field in-place. This results in MIDR_FUJITSU_ERRATUM_010001 being all-ones, and we only match A64FX v0r0. This patch uses MIDR_CPU_VAR_REV() to generate an in-place mask for the variant field, ensuring the we match both v0r0 and v1r0. Fixes: 3e32131abc311a5c ("arm64: Add workaround for Fujitsu A64FX erratum 010001") Reported-by: "Okamoto, Takayuki" Signed-off-by: Mark Rutland [catalin.marinas@arm.com: fixed the patch author] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 2afb1338b48a..f3b659587a36 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -129,7 +129,7 @@ /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX -#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_VARIANT(1)) +#define MIDR_FUJITSU_ERRATUM_010001_MASK (~MIDR_CPU_VAR_REV(1, 0)) #define TCR_CLEAR_FUJITSU_ERRATUM_010001 (TCR_NFD1 | TCR_NFD0) #ifndef __ASSEMBLY__ -- cgit From c82fd1e6bd55ecc001e610e5484e292a7d8a39fc Mon Sep 17 00:00:00 2001 From: William Cohen Date: Fri, 1 Mar 2019 15:00:41 -0500 Subject: arm64/stacktrace: Export save_stack_trace_regs() The ARM64 implements the save_stack_trace_regs function, but it is unusable for any diagnostic tooling compiled as a kernel module due the missing EXPORT_SYMBOL_GPL for the function. Export save_stack_trace_regs() to align with other architectures such as s390, openrisc, and powerpc. This is similar to the ARM64 export of save_stack_trace_tsk() added in git commit e27c7fa015d6. Signed-off-by: William Cohen Signed-off-by: Catalin Marinas --- arch/arm64/kernel/stacktrace.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 1a29f2695ff2..d908b5e9e949 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -143,6 +143,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } +EXPORT_SYMBOL_GPL(save_stack_trace_regs); static noinline void __save_stack_trace(struct task_struct *tsk, struct stack_trace *trace, unsigned int nosched) -- cgit From efd00c722ca855745fcc35a7e6675b5a782a3fc8 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 5 Mar 2019 21:40:57 +0800 Subject: arm64: Add MIDR encoding for HiSilicon Taishan CPUs Adding the MIDR encodings for HiSilicon Taishan v110 CPUs, which is used in Kunpeng ARM64 server SoCs. TSV110 is the abbreviation of Taishan v110. Signed-off-by: Hanjun Guo Reviewed-by: John Garry Reviewed-by: Zhangshaokun Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index f3b659587a36..5f1437099b99 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -77,6 +77,7 @@ #define ARM_CPU_IMP_QCOM 0x51 #define ARM_CPU_IMP_NVIDIA 0x4E #define ARM_CPU_IMP_FUJITSU 0x46 +#define ARM_CPU_IMP_HISI 0x48 #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -107,6 +108,8 @@ #define FUJITSU_CPU_PART_A64FX 0x001 +#define HISI_CPU_PART_TSV110 0xD01 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -126,6 +129,7 @@ #define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) #define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX) +#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ #define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX -- cgit From 0ecc471a2cb7d4d386089445a727f47b59dc9b6e Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 5 Mar 2019 21:40:58 +0800 Subject: arm64: kpti: Whitelist HiSilicon Taishan v110 CPUs HiSilicon Taishan v110 CPUs didn't implement CSV3 field of the ID_AA64PFR0_EL1 and are not susceptible to Meltdown, so whitelist the MIDR in kpti_safe_list[] table. Signed-off-by: Hanjun Guo Reviewed-by: John Garry Reviewed-by: Zhangshaokun Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index e24e94d28767..4061de10cea6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -963,6 +963,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), + MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), { /* sentinel */ } }; char const *str = "command line option"; -- cgit From 6f40b2a5dac4b448fe1e8d94dc4238cd95cd5e34 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:49:53 -0700 Subject: perf list: Filter metrics too When a filter is specified on the command line, filter the metrics too. Before: % perf list foo List of pre-defined events (to be used in -e): Metric Groups: DSB: DSB_Coverage [Fraction of Uops delivered by the DSB (aka Decoded Icache; or Uop Cache)] ... more metrics ... After: % perf list foo List of pre-defined events (to be used in -e): Metric Groups: Signed-off-by: Andi Kleen Acked-by: Jiri Olsa LPU-Reference: 20190314225002.30108-1-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-1y8oi2s8c4jhjtykgs5zvda1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-list.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index c9f98d00c0e9..a8394b4f1167 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -119,7 +119,7 @@ int cmd_list(int argc, const char **argv) details_flag); print_tracepoint_events(NULL, s, raw_dump); print_sdt_events(NULL, s, raw_dump); - metricgroup__print(true, true, NULL, raw_dump, details_flag); + metricgroup__print(true, true, s, raw_dump, details_flag); free(s); } } -- cgit From 03724b2e9c45d931eff0f304f2d3363ade65ca89 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:49:55 -0700 Subject: perf record: Allow to limit number of reported perf.data files When doing long term recording and waiting for some event to snapshot on, we often only care about the last minute or so. The --switch-output command line option supports rotating the perf.data file when the size exceeds a threshold. But the disk would still be filled with unnecessary old files. Add a new option to only keep a number of rotated files, so that the disk space usage can be limited. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa LPU-Reference: 20190314225002.30108-3-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-y5u2lik0ragt4vlktz6qc9ks@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 4 ++++ tools/perf/builtin-record.c | 30 +++++++++++++++++++++++++++++- tools/perf/util/data.c | 11 +++++------ tools/perf/util/data.h | 2 +- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 8f0c2be34848..8fe4dffcadd0 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -495,6 +495,10 @@ overhead. You can still switch them on with: --switch-output --no-no-buildid --no-no-buildid-cache +--switch-max-files=N:: + +When rotating perf.data with --switch-output, only keep N files. + --dry-run:: Parse options then exit. --dry-run can be used to detect errors in cmdline options. diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index a468d882e74f..02d7c40b2d10 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -62,6 +62,9 @@ struct switch_output { unsigned long time; const char *str; bool set; + char **filenames; + int num_files; + int cur_file; }; struct record { @@ -892,6 +895,7 @@ record__switch_output(struct record *rec, bool at_exit) { struct perf_data *data = &rec->data; int fd, err; + char *new_filename; /* Same Size: "2015122520103046"*/ char timestamp[] = "InvalidTimestamp"; @@ -912,7 +916,7 @@ record__switch_output(struct record *rec, bool at_exit) fd = perf_data__switch(data, timestamp, rec->session->header.data_offset, - at_exit); + at_exit, &new_filename); if (fd >= 0 && !at_exit) { rec->bytes_written = 0; rec->session->header.data_size = 0; @@ -922,6 +926,21 @@ record__switch_output(struct record *rec, bool at_exit) fprintf(stderr, "[ perf record: Dump %s.%s ]\n", data->path, timestamp); + if (rec->switch_output.num_files) { + int n = rec->switch_output.cur_file + 1; + + if (n >= rec->switch_output.num_files) + n = 0; + rec->switch_output.cur_file = n; + if (rec->switch_output.filenames[n]) { + remove(rec->switch_output.filenames[n]); + free(rec->switch_output.filenames[n]); + } + rec->switch_output.filenames[n] = new_filename; + } else { + free(new_filename); + } + /* Output tracking events */ if (!at_exit) { record__synthesize(rec, false); @@ -1973,6 +1992,8 @@ static struct option __record_options[] = { &record.switch_output.set, "signal,size,time", "Switch output when receive SIGUSR2 or cross size,time threshold", "signal"), + OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, + "Limit number of switch output generated files"), OPT_BOOLEAN(0, "dry-run", &dry_run, "Parse options then exit"), #ifdef HAVE_AIO_SUPPORT @@ -2059,6 +2080,13 @@ int cmd_record(int argc, const char **argv) alarm(rec->switch_output.time); } + if (rec->switch_output.num_files) { + rec->switch_output.filenames = calloc(sizeof(char *), + rec->switch_output.num_files); + if (!rec->switch_output.filenames) + return -EINVAL; + } + /* * Allow aliases to facilitate the lookup of symbols for address * filters. Refer to auxtrace_parse_filters(). diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index c6b67efea11a..6a64f713710d 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -361,9 +361,9 @@ ssize_t perf_data__write(struct perf_data *data, int perf_data__switch(struct perf_data *data, const char *postfix, - size_t pos, bool at_exit) + size_t pos, bool at_exit, + char **new_filepath) { - char *new_filepath; int ret; if (check_pipe(data)) @@ -371,15 +371,15 @@ int perf_data__switch(struct perf_data *data, if (perf_data__is_read(data)) return -EINVAL; - if (asprintf(&new_filepath, "%s.%s", data->path, postfix) < 0) + if (asprintf(new_filepath, "%s.%s", data->path, postfix) < 0) return -ENOMEM; /* * Only fire a warning, don't return error, continue fill * original file. */ - if (rename(data->path, new_filepath)) - pr_warning("Failed to rename %s to %s\n", data->path, new_filepath); + if (rename(data->path, *new_filepath)) + pr_warning("Failed to rename %s to %s\n", data->path, *new_filepath); if (!at_exit) { close(data->file.fd); @@ -396,7 +396,6 @@ int perf_data__switch(struct perf_data *data, } ret = data->file.fd; out: - free(new_filepath); return ret; } diff --git a/tools/perf/util/data.h b/tools/perf/util/data.h index 6aef8746469f..259868a39019 100644 --- a/tools/perf/util/data.h +++ b/tools/perf/util/data.h @@ -70,7 +70,7 @@ ssize_t perf_data_file__write(struct perf_data_file *file, */ int perf_data__switch(struct perf_data *data, const char *postfix, - size_t pos, bool at_exit); + size_t pos, bool at_exit, char **new_filepath); int perf_data__create_dir(struct perf_data *data, int nr); int perf_data__open_dir(struct perf_data *data); -- cgit From 9496c015ed39ddfce971d63a1442e6d258504a7d Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Tue, 19 Mar 2019 23:05:18 +0800 Subject: blk-mq: remove unused 'nr_expired' from blk_mq_hw_ctx There is no usage of 'nr_expired'. The 'nr_expired' was introduced by commit 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a RCU and generation based scheme"). Its usage was removed since commit 12f5b9314545 ("blk-mq: Remove generation seqeunce"). Signed-off-by: Dongli Zhang Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b0c814bcc7e3..35359697318b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,7 +57,6 @@ struct blk_mq_hw_ctx { unsigned int queue_num; atomic_t nr_active; - unsigned int nr_expired; struct hlist_node cpuhp_dead; struct kobject kobj; -- cgit From 228de124f290e6b981b2c61fbd78215e11264044 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 19 Mar 2019 08:16:21 -0700 Subject: xfs: dabtree scrub needs to range-check level Make sure scrub's dabtree iterator function checks that we're not going deeper in the stack than our cursor permits. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/dabtree.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c index f1260b4bfdee..90527b094878 100644 --- a/fs/xfs/scrub/dabtree.c +++ b/fs/xfs/scrub/dabtree.c @@ -574,6 +574,11 @@ xchk_da_btree( /* Drill another level deeper. */ blkno = be32_to_cpu(key->before); level++; + if (level >= XFS_DA_NODE_MAXDEPTH) { + /* Too deep! */ + xchk_da_set_corrupt(&ds, level - 1); + break; + } ds.tree_level--; error = xchk_da_btree_block(&ds, level, blkno); if (error) -- cgit From a72e9d8d69e7ca848ddd4c4db72d3ab280c1775d Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 19 Mar 2019 08:16:22 -0700 Subject: xfs: fix btree scrub checking with regards to root-in-inode In xchk_btree_check_owner, we can be passed a null buffer pointer. This should only happen for the root of a root-in-inode btree type, but we should program defensively in case the btree cursor state ever gets screwed up and we get a null buffer anyway. Coverity-id: 1438713 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/scrub/btree.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c index 6f94d1f7322d..117910db51b8 100644 --- a/fs/xfs/scrub/btree.c +++ b/fs/xfs/scrub/btree.c @@ -415,8 +415,17 @@ xchk_btree_check_owner( struct xfs_btree_cur *cur = bs->cur; struct check_owner *co; - if ((cur->bc_flags & XFS_BTREE_ROOT_IN_INODE) && bp == NULL) + /* + * In theory, xfs_btree_get_block should only give us a null buffer + * pointer for the root of a root-in-inode btree type, but we need + * to check defensively here in case the cursor state is also screwed + * up. + */ + if (bp == NULL) { + if (!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE)) + xchk_btree_set_corrupt(bs->sc, bs->cur, level); return 0; + } /* * We want to cross-reference each btree block with the bnobt -- cgit From 4b0bce30f39b7733420bb8b28e340aa91c219bc1 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Tue, 19 Mar 2019 08:16:22 -0700 Subject: xfs: always init bma in xfs_bmapi_write Always init the tp/ip fields of bma in xfs_bmapi_write so that the bmapi_finish at the bottom never trips over null transaction or inode pointers. Coverity-id: 1443964 Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster --- fs/xfs/libxfs/xfs_bmap.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index ae4c3b0d84db..4637ae1ae91c 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -4252,9 +4252,13 @@ xfs_bmapi_write( struct xfs_bmbt_irec *mval, /* output: map values */ int *nmap) /* i/o: mval size/count */ { + struct xfs_bmalloca bma = { + .tp = tp, + .ip = ip, + .total = total, + }; struct xfs_mount *mp = ip->i_mount; struct xfs_ifork *ifp; - struct xfs_bmalloca bma = { NULL }; /* args for xfs_bmap_alloc */ xfs_fileoff_t end; /* end of mapped file region */ bool eof = false; /* after the end of extents */ int error; /* error return */ @@ -4322,10 +4326,6 @@ xfs_bmapi_write( eof = true; if (!xfs_iext_peek_prev_extent(ifp, &bma.icur, &bma.prev)) bma.prev.br_startoff = NULLFILEOFF; - bma.tp = tp; - bma.ip = ip; - bma.total = total; - bma.datatype = 0; bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork); n = 0; -- cgit From c0316470683af0507427c3e0660246feacdf8363 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Mar 2019 13:22:07 +0100 Subject: mt7601u: check chip version on probe Since some USB device IDs are duplicated between mt7601u and mt76x0u devices, check chip version on probe and return error if not match 0x7601. Don't think this is serious issue, probe most likely will fail at some other point for wrong device, but we do not have to configure it if we know is not mt7601u device. Reported-by: Xose Vazquez Perez Signed-off-by: Stanislaw Gruszka Acked-by: Jakub Kicinski Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt7601u/usb.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/wireless/mediatek/mt7601u/usb.c b/drivers/net/wireless/mediatek/mt7601u/usb.c index d8b7863f7926..6ae7f14dc9bf 100644 --- a/drivers/net/wireless/mediatek/mt7601u/usb.c +++ b/drivers/net/wireless/mediatek/mt7601u/usb.c @@ -303,6 +303,10 @@ static int mt7601u_probe(struct usb_interface *usb_intf, mac_rev = mt7601u_rr(dev, MT_MAC_CSR0); dev_info(dev->dev, "ASIC revision: %08x MAC revision: %08x\n", asic_rev, mac_rev); + if ((asic_rev >> 16) != 0x7601) { + ret = -ENODEV; + goto err; + } /* Note: vendor driver skips this check for MT7601U */ if (!(mt7601u_rr(dev, MT_EFUSE_CTRL) & MT_EFUSE_CTRL_SEL)) -- cgit From 40b941611bd4826b7e3e449f738af98ad22e1ce6 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Mar 2019 13:22:21 +0100 Subject: mt76x02u: check chip version on probe Since some USB device IDs are duplicated between mt76x0u, mt7601u and mt76x2u device, check chip version on probe and return error if not match the driver. Don't think this is serious issue, probe most likely will fail at some other point for wrong device, but we do not have to configure it if we know is not our device. Reported-by: Xose Vazquez Perez Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/usb.c | 10 +++++++--- drivers/net/wireless/mediatek/mt76/mt76x02.h | 7 +++++++ drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 4 ++++ 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c index 91718647da02..e5a06f74a6f7 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/usb.c @@ -229,7 +229,7 @@ static int mt76x0u_probe(struct usb_interface *usb_intf, struct usb_device *usb_dev = interface_to_usbdev(usb_intf); struct mt76x02_dev *dev; struct mt76_dev *mdev; - u32 asic_rev, mac_rev; + u32 mac_rev; int ret; mdev = mt76_alloc_device(&usb_intf->dev, sizeof(*dev), &mt76x0u_ops, @@ -262,10 +262,14 @@ static int mt76x0u_probe(struct usb_interface *usb_intf, goto err; } - asic_rev = mt76_rr(dev, MT_ASIC_VERSION); + mdev->rev = mt76_rr(dev, MT_ASIC_VERSION); mac_rev = mt76_rr(dev, MT_MAC_CSR0); dev_info(mdev->dev, "ASIC revision: %08x MAC revision: %08x\n", - asic_rev, mac_rev); + mdev->rev, mac_rev); + if (!is_mt76x0(dev)) { + ret = -ENODEV; + goto err; + } /* Note: vendor driver skips this check for MT76X0U */ if (!(mt76_rr(dev, MT_EFUSE_CTRL) & MT_EFUSE_CTRL_SEL)) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h index b5a36d9fb2bd..07061eb4d1e1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h @@ -192,6 +192,13 @@ void mt76x02_mac_start(struct mt76x02_dev *dev); void mt76x02_init_debugfs(struct mt76x02_dev *dev); +static inline bool is_mt76x0(struct mt76x02_dev *dev) +{ + return mt76_chip(&dev->mt76) == 0x7610 || + mt76_chip(&dev->mt76) == 0x7630 || + mt76_chip(&dev->mt76) == 0x7650; +} + static inline bool is_mt76x2(struct mt76x02_dev *dev) { return mt76_chip(&dev->mt76) == 0x7612 || diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c index 7a5d539873ca..ac0f13d46299 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c @@ -65,6 +65,10 @@ static int mt76x2u_probe(struct usb_interface *intf, mdev->rev = mt76_rr(dev, MT_ASIC_VERSION); dev_info(mdev->dev, "ASIC revision: %08x\n", mdev->rev); + if (!is_mt76x2(dev)) { + err = -ENODEV; + goto err; + } err = mt76x2u_register_device(dev); if (err < 0) -- cgit From f2a00a821aacfa77985e4dbe83ed064c48a21bd5 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Mon, 11 Mar 2019 14:09:53 +0100 Subject: mt76: mt7603: use the correct hweight8() function __sw_hweight8() is only defined if CONFIG_GENERIC_HWEIGHT is enabled. The function that works on all architectures is hweight8(). Signed-off-by: Felix Fietkau Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt7603/beacon.c | 3 +-- drivers/net/wireless/mediatek/mt76/mt7603/init.c | 2 +- drivers/net/wireless/mediatek/mt76/mt7603/mcu.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c b/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c index afcd86f735b4..4dcb465095d1 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/beacon.c @@ -135,8 +135,7 @@ void mt7603_pre_tbtt_tasklet(unsigned long arg) out: mt76_queue_tx_cleanup(dev, MT_TXQ_BEACON, false); - if (dev->mt76.q_tx[MT_TXQ_BEACON].queued > - __sw_hweight8(dev->beacon_mask)) + if (dev->mt76.q_tx[MT_TXQ_BEACON].queued > hweight8(dev->beacon_mask)) dev->beacon_check++; } diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c index 15cc8f33b34d..d54dda67d036 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c @@ -112,7 +112,7 @@ static void mt7603_phy_init(struct mt7603_dev *dev) { int rx_chains = dev->mt76.antenna_mask; - int tx_chains = __sw_hweight8(rx_chains) - 1; + int tx_chains = hweight8(rx_chains) - 1; mt76_rmw(dev, MT_WF_RMAC_RMCR, (MT_WF_RMAC_RMCR_SMPS_MODE | diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c index 4b0713f1fd5e..d06905ea8cc6 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c +++ b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c @@ -433,7 +433,7 @@ int mt7603_mcu_set_channel(struct mt7603_dev *dev) { struct cfg80211_chan_def *chandef = &dev->mt76.chandef; struct ieee80211_hw *hw = mt76_hw(dev); - int n_chains = __sw_hweight8(dev->mt76.antenna_mask); + int n_chains = hweight8(dev->mt76.antenna_mask); struct { u8 control_chan; u8 center_chan; -- cgit From 13f61dfc5235cfa82b1efbcdf4b4f14d9be233da Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Mon, 11 Mar 2019 14:24:35 +0100 Subject: mt76: fix schedule while atomic in mt76x02_reset_state Fix following schedule while atomic in mt76x02_reset_state since synchronize_rcu is run inside a RCU section [44036.944222] mt76x2e 0000:06:00.0: MCU message 31 (seq 3) timed out [44036.944281] BUG: sleeping function called from invalid context at kernel/rcu/tree_exp.h:818 [44036.944284] in_atomic(): 1, irqs_disabled(): 0, pid: 28066, name: kworker/u4:1 [44036.944287] INFO: lockdep is turned off. [44036.944292] CPU: 1 PID: 28066 Comm: kworker/u4:1 Tainted: G W 5.0.0-rc7-wdn-t1+ #7 [44036.944294] Hardware name: Dell Inc. Studio XPS 1340/0K183D, BIOS A11 09/08/2009 [44036.944305] Workqueue: phy1 mt76x02_wdt_work [mt76x02_lib] [44036.944308] Call Trace: [44036.944317] dump_stack+0x67/0x90 [44036.944322] ___might_sleep.cold.88+0x9f/0xaf [44036.944327] rcu_blocking_is_gp+0x13/0x50 [44036.944330] synchronize_rcu+0x17/0x80 [44036.944337] mt76_sta_state+0x138/0x1d0 [mt76] [44036.944349] mt76x02_wdt_work+0x1c9/0x610 [mt76x02_lib] [44036.944355] process_one_work+0x2a5/0x620 [44036.944361] worker_thread+0x35/0x3e0 [44036.944368] kthread+0x11c/0x140 [44036.944376] ret_from_fork+0x3a/0x50 [44036.944384] BUG: scheduling while atomic: kworker/u4:1/28066/0x00000002 [44036.944387] INFO: lockdep is turned off. [44036.944389] Modules linked in: cmac ctr ccm af_packet snd_hda_codec_hdmi Introduce __mt76_sta_remove in order to run sta_remove without holding dev->mutex. Move __mt76_sta_remove outside of RCU section in mt76x02_reset_state Fixes: e4ebb8b403d1 ("mt76: mt76x2: implement full device restart on watchdog reset") Signed-off-by: Lorenzo Bianconi Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mac80211.c | 18 +++++++++++------- drivers/net/wireless/mediatek/mt76/mt76.h | 2 ++ drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 19 ++++++++++--------- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c index a033745adb2f..316167404729 100644 --- a/drivers/net/wireless/mediatek/mt76/mac80211.c +++ b/drivers/net/wireless/mediatek/mt76/mac80211.c @@ -679,19 +679,15 @@ out: return ret; } -static void -mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, - struct ieee80211_sta *sta) +void __mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta) { struct mt76_wcid *wcid = (struct mt76_wcid *)sta->drv_priv; - int idx = wcid->idx; - int i; + int i, idx = wcid->idx; rcu_assign_pointer(dev->wcid[idx], NULL); synchronize_rcu(); - mutex_lock(&dev->mutex); - if (dev->drv->sta_remove) dev->drv->sta_remove(dev, vif, sta); @@ -699,7 +695,15 @@ mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, for (i = 0; i < ARRAY_SIZE(sta->txq); i++) mt76_txq_remove(dev, sta->txq[i]); mt76_wcid_free(dev->wcid_mask, idx); +} +EXPORT_SYMBOL_GPL(__mt76_sta_remove); +static void +mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta) +{ + mutex_lock(&dev->mutex); + __mt76_sta_remove(dev, vif, sta); mutex_unlock(&dev->mutex); } diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h index cb50e96e736b..bcbfd3c4a44b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76.h +++ b/drivers/net/wireless/mediatek/mt76/mt76.h @@ -695,6 +695,8 @@ int mt76_sta_state(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_sta *sta, enum ieee80211_sta_state old_state, enum ieee80211_sta_state new_state); +void __mt76_sta_remove(struct mt76_dev *dev, struct ieee80211_vif *vif, + struct ieee80211_sta *sta); struct ieee80211_sta *mt76_rx_convert(struct sk_buff *skb); diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c index 9de015bcd4f9..daaed1220147 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c @@ -441,19 +441,23 @@ static void mt76x02_reset_state(struct mt76x02_dev *dev) { int i; + lockdep_assert_held(&dev->mt76.mutex); + clear_bit(MT76_STATE_RUNNING, &dev->mt76.state); rcu_read_lock(); - ieee80211_iter_keys_rcu(dev->mt76.hw, NULL, mt76x02_key_sync, NULL); + rcu_read_unlock(); for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid); i++) { - struct mt76_wcid *wcid = rcu_dereference(dev->mt76.wcid[i]); - struct mt76x02_sta *msta; struct ieee80211_sta *sta; struct ieee80211_vif *vif; + struct mt76x02_sta *msta; + struct mt76_wcid *wcid; void *priv; + wcid = rcu_dereference_protected(dev->mt76.wcid[i], + lockdep_is_held(&dev->mt76.mutex)); if (!wcid) continue; @@ -463,13 +467,10 @@ static void mt76x02_reset_state(struct mt76x02_dev *dev) priv = msta->vif; vif = container_of(priv, struct ieee80211_vif, drv_priv); - mt76_sta_state(dev->mt76.hw, vif, sta, - IEEE80211_STA_NONE, IEEE80211_STA_NOTEXIST); + __mt76_sta_remove(&dev->mt76, vif, sta); memset(msta, 0, sizeof(*msta)); } - rcu_read_unlock(); - dev->vif_mask = 0; dev->beacon_mask = 0; } @@ -489,11 +490,11 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev) for (i = 0; i < ARRAY_SIZE(dev->mt76.napi); i++) napi_disable(&dev->mt76.napi[i]); + mutex_lock(&dev->mt76.mutex); + if (restart) mt76x02_reset_state(dev); - mutex_lock(&dev->mt76.mutex); - if (dev->beacon_mask) mt76_clear(dev, MT_BEACON_TIME_CFG, MT_BEACON_TIME_CFG_BEACON_TX | -- cgit From 7dfc45e6282a7662279d168cc1219929456f8750 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Tue, 12 Mar 2019 13:32:07 +0100 Subject: mt76x02: do not enable RTS/CTS by default My commit 26a7b5473191 ("mt76x02: set protection according to ht operation element") enabled by default RTS/CTS protection for OFDM and CCK traffic, because MT_TX_RTS_CFG_THRESH is configured to non 0xffff by initvals and .set_rts_threshold callback is not called by mac80211 on initialization, only on user request or during ieee80211_reconfig() (suspend/resuem or restart_hw). Enabling RTS/CTS cause some problems when sending probe request frames by hcxdumptool penetration tool, but I expect it can cause other issues on different scenarios. Restore previous setting of RTS/CTS being disabled by default for OFDM/CCK by changing MT_TX_RTS_CFG_THRESH initvals to 0xffff. Fixes: 26a7b5473191 ("mt76x02: set protection according to ht operation element") Signed-off-by: Stanislaw Gruszka Signed-off-by: Kalle Valo --- drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/init.c | 2 +- drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c | 1 - 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h index 0290ba5869a5..736f81752b5b 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h +++ b/drivers/net/wireless/mediatek/mt76/mt76x0/initvals.h @@ -46,7 +46,7 @@ static const struct mt76_reg_pair common_mac_reg_table[] = { { MT_MM20_PROT_CFG, 0x01742004 }, { MT_MM40_PROT_CFG, 0x03f42084 }, { MT_TXOP_CTRL_CFG, 0x0000583f }, - { MT_TX_RTS_CFG, 0x00092b20 }, + { MT_TX_RTS_CFG, 0x00ffff20 }, { MT_EXP_ACK_TIME, 0x002400ca }, { MT_TXOP_HLDR_ET, 0x00000002 }, { MT_XIFS_TIME_CFG, 0x33a41010 }, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c index f8534362e2c8..a30ef2c5a9db 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/init.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/init.c @@ -106,7 +106,7 @@ void mt76_write_mac_initvals(struct mt76x02_dev *dev) { MT_TX_SW_CFG1, 0x00010000 }, { MT_TX_SW_CFG2, 0x00000000 }, { MT_TXOP_CTRL_CFG, 0x0400583f }, - { MT_TX_RTS_CFG, 0x00100020 }, + { MT_TX_RTS_CFG, 0x00ffff20 }, { MT_TX_TIMEOUT_CFG, 0x000a2290 }, { MT_TX_RETRY_CFG, 0x47f01f0f }, { MT_EXP_ACK_TIME, 0x002c00dc }, diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c index 5e84b4535cb1..3b82345756ea 100644 --- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c +++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb_mac.c @@ -93,7 +93,6 @@ int mt76x2u_mac_reset(struct mt76x02_dev *dev) mt76_wr(dev, MT_TX_LINK_CFG, 0x1020); mt76_wr(dev, MT_AUTO_RSP_CFG, 0x13); mt76_wr(dev, MT_MAX_LEN_CFG, 0x2f00); - mt76_wr(dev, MT_TX_RTS_CFG, 0x92b20); mt76_wr(dev, MT_WMM_AIFSN, 0x2273); mt76_wr(dev, MT_WMM_CWMIN, 0x2344); -- cgit From 0e3edd94448001a33ed7f8af2179cd4a280348a2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Mar 2019 23:20:42 +0800 Subject: drivers: base: swnode: Make two functions static Fix sparse warning: drivers/base/swnode.c:475:22: warning: symbol 'software_node_get_parent' was not declared. Should it be static? drivers/base/swnode.c:484:22: warning: symbol 'software_node_get_next_child' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Rafael J. Wysocki --- drivers/base/swnode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c index 1fad9291f6aa..7fc5a18e02ad 100644 --- a/drivers/base/swnode.c +++ b/drivers/base/swnode.c @@ -472,7 +472,7 @@ static int software_node_read_string_array(const struct fwnode_handle *fwnode, val, nval); } -struct fwnode_handle * +static struct fwnode_handle * software_node_get_parent(const struct fwnode_handle *fwnode) { struct software_node *swnode = to_software_node(fwnode); @@ -481,7 +481,7 @@ software_node_get_parent(const struct fwnode_handle *fwnode) NULL; } -struct fwnode_handle * +static struct fwnode_handle * software_node_get_next_child(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { -- cgit From 2a95496634a017c19641f26f00907af75b962f01 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Tue, 12 Feb 2019 09:34:39 -0500 Subject: tools/power turbostat: return the exit status of a command turbostat failed to return a non-zero exit status even though the supplied command (turbostat ) failed. Currently when turbostat forks a command it returns zero instead of the actual exit status of the command. Modify the code to return the exit status. Signed-off-by: David Arcari Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9327c0ddc3a5..c3fad065c89c 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -5077,6 +5077,9 @@ int fork_it(char **argv) signal(SIGQUIT, SIG_IGN); if (waitpid(child_pid, &status, 0) == -1) err(status, "waitpid"); + + if (WIFEXITED(status)) + status = WEXITSTATUS(status); } /* * n.b. fork_it() does not check for errors from for_all_cpus() -- cgit From 0cb98abb5bd13b9a636bde603d952d722688b428 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 19 Mar 2019 12:12:13 -0400 Subject: NFSv4.1 don't free interrupted slot on open Allow the async rpc task for finish and update the open state if needed, then free the slot. Otherwise, the async rpc unable to decode the reply. Signed-off-by: Olga Kornievskaia Fixes: ae55e59da0e4 ("pnfs: Don't release the sequence slot...") Cc: stable@vger.kernel.org # v4.18+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 6d2812a39287..741ff8c9c6ed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -2933,7 +2933,8 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata, } out: - nfs4_sequence_free_slot(&opendata->o_res.seq_res); + if (!opendata->cancelled) + nfs4_sequence_free_slot(&opendata->o_res.seq_res); return ret; } -- cgit From ebff0b0e3d3c862c16c487959db5e0d879632559 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 4 Mar 2019 17:37:44 +0000 Subject: KVM: arm64: Reset the PMU in preemptible context We've become very cautious to now always reset the vcpu when nothing is loaded on the physical CPU. To do so, we now disable preemption and do a kvm_arch_vcpu_put() to make sure we have all the state in memory (and that it won't be loaded behind out back). This now causes issues with resetting the PMU, which calls into perf. Perf itself uses mutexes, which clashes with the lack of preemption. It is worth realizing that the PMU is fully emulated, and that no PMU state is ever loaded on the physical CPU. This means we can perfectly reset the PMU outside of the non-preemptible section. Fixes: e761a927bc9a ("KVM: arm/arm64: Reset the VCPU without preemption and vcpu state loaded") Reported-by: Julien Grall Tested-by: Julien Grall Signed-off-by: Marc Zyngier --- arch/arm64/kvm/reset.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index f16a5f8ff2b4..e2a0500cd7a2 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -123,6 +123,9 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) int ret = -EINVAL; bool loaded; + /* Reset PMU outside of the non-preemptible section */ + kvm_pmu_vcpu_reset(vcpu); + preempt_disable(); loaded = (vcpu->cpu != -1); if (loaded) @@ -170,9 +173,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) vcpu->arch.reset_state.reset = false; } - /* Reset PMU */ - kvm_pmu_vcpu_reset(vcpu); - /* Default workaround setup is enabled (if supported) */ if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; -- cgit From ca71228b42a96908eca7658861eafacd227856c9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Mar 2019 18:07:50 +0000 Subject: arm64: KVM: Always set ICH_HCR_EL2.EN if GICv4 is enabled The normal interrupt flow is not to enable the vgic when no virtual interrupt is to be injected (i.e. the LRs are empty). But when a guest is likely to use GICv4 for LPIs, we absolutely need to switch it on at all times. Otherwise, VLPIs only get delivered when there is something in the LRs, which doesn't happen very often. Reported-by: Nianyao Tang Tested-by: Shameerali Kolothum Thodi Signed-off-by: Marc Zyngier --- virt/kvm/arm/hyp/vgic-v3-sr.c | 4 ++-- virt/kvm/arm/vgic/vgic.c | 14 ++++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 264d92da3240..370bd6c5e6cb 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -222,7 +222,7 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) } } - if (used_lrs) { + if (used_lrs || cpu_if->its_vpe.its_vm) { int i; u32 elrsr; @@ -247,7 +247,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) u64 used_lrs = vcpu->arch.vgic_cpu.used_lrs; int i; - if (used_lrs) { + if (used_lrs || cpu_if->its_vpe.its_vm) { write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); for (i = 0; i < used_lrs; i++) diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index abd9c7352677..3af69f2a3866 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -867,15 +867,21 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) * either observe the new interrupt before or after doing this check, * and introducing additional synchronization mechanism doesn't change * this. + * + * Note that we still need to go through the whole thing if anything + * can be directly injected (GICv4). */ - if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && + !vgic_supports_direct_msis(vcpu->kvm)) return; DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); - raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); - vgic_flush_lr_state(vcpu); - raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { + raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + vgic_flush_lr_state(vcpu); + raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + } if (can_access_vgic_from_kernel()) vgic_restore_state(vcpu); -- cgit From a6ecfb11bf37743c1ac49b266595582b107b61d4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Mar 2019 12:47:11 +0000 Subject: KVM: arm/arm64: vgic-its: Take the srcu lock when writing to guest memory When halting a guest, QEMU flushes the virtual ITS caches, which amounts to writing to the various tables that the guest has allocated. When doing this, we fail to take the srcu lock, and the kernel shouts loudly if running a lockdep kernel: [ 69.680416] ============================= [ 69.680819] WARNING: suspicious RCU usage [ 69.681526] 5.1.0-rc1-00008-g600025238f51-dirty #18 Not tainted [ 69.682096] ----------------------------- [ 69.682501] ./include/linux/kvm_host.h:605 suspicious rcu_dereference_check() usage! [ 69.683225] [ 69.683225] other info that might help us debug this: [ 69.683225] [ 69.683975] [ 69.683975] rcu_scheduler_active = 2, debug_locks = 1 [ 69.684598] 6 locks held by qemu-system-aar/4097: [ 69.685059] #0: 0000000034196013 (&kvm->lock){+.+.}, at: vgic_its_set_attr+0x244/0x3a0 [ 69.686087] #1: 00000000f2ed935e (&its->its_lock){+.+.}, at: vgic_its_set_attr+0x250/0x3a0 [ 69.686919] #2: 000000005e71ea54 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.687698] #3: 00000000c17e548d (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.688475] #4: 00000000ba386017 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.689978] #5: 00000000c2c3c335 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [ 69.690729] [ 69.690729] stack backtrace: [ 69.691151] CPU: 2 PID: 4097 Comm: qemu-system-aar Not tainted 5.1.0-rc1-00008-g600025238f51-dirty #18 [ 69.691984] Hardware name: rockchip evb_rk3399/evb_rk3399, BIOS 2019.04-rc3-00124-g2feec69fb1 03/15/2019 [ 69.692831] Call trace: [ 69.694072] lockdep_rcu_suspicious+0xcc/0x110 [ 69.694490] gfn_to_memslot+0x174/0x190 [ 69.694853] kvm_write_guest+0x50/0xb0 [ 69.695209] vgic_its_save_tables_v0+0x248/0x330 [ 69.695639] vgic_its_set_attr+0x298/0x3a0 [ 69.696024] kvm_device_ioctl_attr+0x9c/0xd8 [ 69.696424] kvm_device_ioctl+0x8c/0xf8 [ 69.696788] do_vfs_ioctl+0xc8/0x960 [ 69.697128] ksys_ioctl+0x8c/0xa0 [ 69.697445] __arm64_sys_ioctl+0x28/0x38 [ 69.697817] el0_svc_common+0xd8/0x138 [ 69.698173] el0_svc_handler+0x38/0x78 [ 69.698528] el0_svc+0x8/0xc The fix is to obviously take the srcu lock, just like we do on the read side of things since bf308242ab98. One wonders why this wasn't fixed at the same time, but hey... Fixes: bf308242ab98 ("KVM: arm/arm64: VGIC/ITS: protect kvm_read_guest() calls with SRCU lock") Signed-off-by: Marc Zyngier --- arch/arm/include/asm/kvm_mmu.h | 11 +++++++++++ arch/arm64/include/asm/kvm_mmu.h | 11 +++++++++++ virt/kvm/arm/vgic/vgic-its.c | 8 ++++---- virt/kvm/arm/vgic/vgic-v3.c | 4 ++-- 4 files changed, 28 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 2de96a180166..31de4ab93005 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -381,6 +381,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, return ret; } +static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, + const void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_write_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + static inline void *kvm_get_hyp_vector(void) { switch(read_cpuid_part()) { diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b0742a16c6c9..ebeefcf835e8 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -445,6 +445,17 @@ static inline int kvm_read_guest_lock(struct kvm *kvm, return ret; } +static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, + const void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_write_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index ab3f47745d9c..c41e11fd841c 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1919,7 +1919,7 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | ite->collection->collection_id; val = cpu_to_le64(val); - return kvm_write_guest(kvm, gpa, &val, ite_esz); + return kvm_write_guest_lock(kvm, gpa, &val, ite_esz); } /** @@ -2066,7 +2066,7 @@ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | (dev->num_eventid_bits - 1)); val = cpu_to_le64(val); - return kvm_write_guest(kvm, ptr, &val, dte_esz); + return kvm_write_guest_lock(kvm, ptr, &val, dte_esz); } /** @@ -2246,7 +2246,7 @@ static int vgic_its_save_cte(struct vgic_its *its, ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | collection->collection_id); val = cpu_to_le64(val); - return kvm_write_guest(its->dev->kvm, gpa, &val, esz); + return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz); } static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) @@ -2317,7 +2317,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) */ val = 0; BUG_ON(cte_esz > sizeof(val)); - ret = kvm_write_guest(its->dev->kvm, gpa, &val, cte_esz); + ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); return ret; } diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 408a78eb6a97..9f87e58dbd4a 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -358,7 +358,7 @@ retry: if (status) { /* clear consumed data */ val &= ~(1 << bit_nr); - ret = kvm_write_guest(kvm, ptr, &val, 1); + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; } @@ -409,7 +409,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) else val &= ~(1 << bit_nr); - ret = kvm_write_guest(kvm, ptr, &val, 1); + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; } -- cgit From 7494cec6cb3ba7385a6a223b81906384f15aae34 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 19 Mar 2019 12:56:23 +0000 Subject: KVM: arm/arm64: vgic-its: Take the srcu lock when parsing the memslots Calling kvm_is_visible_gfn() implies that we're parsing the memslots, and doing this without the srcu lock is frown upon: [12704.164532] ============================= [12704.164544] WARNING: suspicious RCU usage [12704.164560] 5.1.0-rc1-00008-g600025238f51-dirty #16 Tainted: G W [12704.164573] ----------------------------- [12704.164589] ./include/linux/kvm_host.h:605 suspicious rcu_dereference_check() usage! [12704.164602] other info that might help us debug this: [12704.164616] rcu_scheduler_active = 2, debug_locks = 1 [12704.164631] 6 locks held by qemu-system-aar/13968: [12704.164644] #0: 000000007ebdae4f (&kvm->lock){+.+.}, at: vgic_its_set_attr+0x244/0x3a0 [12704.164691] #1: 000000007d751022 (&its->its_lock){+.+.}, at: vgic_its_set_attr+0x250/0x3a0 [12704.164726] #2: 00000000219d2706 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164761] #3: 00000000a760aecd (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164794] #4: 000000000ef8e31d (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164827] #5: 000000007a872093 (&vcpu->mutex){+.+.}, at: lock_all_vcpus+0x64/0xd0 [12704.164861] stack backtrace: [12704.164878] CPU: 2 PID: 13968 Comm: qemu-system-aar Tainted: G W 5.1.0-rc1-00008-g600025238f51-dirty #16 [12704.164887] Hardware name: rockchip evb_rk3399/evb_rk3399, BIOS 2019.04-rc3-00124-g2feec69fb1 03/15/2019 [12704.164896] Call trace: [12704.164910] dump_backtrace+0x0/0x138 [12704.164920] show_stack+0x24/0x30 [12704.164934] dump_stack+0xbc/0x104 [12704.164946] lockdep_rcu_suspicious+0xcc/0x110 [12704.164958] gfn_to_memslot+0x174/0x190 [12704.164969] kvm_is_visible_gfn+0x28/0x70 [12704.164980] vgic_its_check_id.isra.0+0xec/0x1e8 [12704.164991] vgic_its_save_tables_v0+0x1ac/0x330 [12704.165001] vgic_its_set_attr+0x298/0x3a0 [12704.165012] kvm_device_ioctl_attr+0x9c/0xd8 [12704.165022] kvm_device_ioctl+0x8c/0xf8 [12704.165035] do_vfs_ioctl+0xc8/0x960 [12704.165045] ksys_ioctl+0x8c/0xa0 [12704.165055] __arm64_sys_ioctl+0x28/0x38 [12704.165067] el0_svc_common+0xd8/0x138 [12704.165078] el0_svc_handler+0x38/0x78 [12704.165089] el0_svc+0x8/0xc Make sure the lock is taken when doing this. Fixes: bf308242ab98 ("KVM: arm/arm64: VGIC/ITS: protect kvm_read_guest() calls with SRCU lock") Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index c41e11fd841c..fcb2fceaa4a5 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -754,8 +754,9 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, u64 indirect_ptr, type = GITS_BASER_TYPE(baser); phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); int esz = GITS_BASER_ENTRY_SIZE(baser); - int index; + int index, idx; gfn_t gfn; + bool ret; switch (type) { case GITS_BASER_TYPE_DEVICE: @@ -782,7 +783,8 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (eaddr) *eaddr = addr; - return kvm_is_visible_gfn(its->dev->kvm, gfn); + + goto out; } /* calculate and check the index into the 1st level */ @@ -812,7 +814,12 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, if (eaddr) *eaddr = indirect_ptr; - return kvm_is_visible_gfn(its->dev->kvm, gfn); + +out: + idx = srcu_read_lock(&its->dev->kvm->srcu); + ret = kvm_is_visible_gfn(its->dev->kvm, gfn); + srcu_read_unlock(&its->dev->kvm->srcu, idx); + return ret; } static int vgic_its_alloc_collection(struct vgic_its *its, -- cgit From 57aeef7f7a6848024b92c32bf23e8c85a8ac896d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 19 Mar 2019 09:32:36 -0700 Subject: platform/chrome: cros_ec_debugfs: cancel/schedule logging work only if supported The following traceback was reported on ASUS C201, which does not support console logging. ------------[ cut here ]------------ WARNING: CPU: 2 PID: 361 at kernel/workqueue.c:3030 __flush_work+0x38/0x154 Modules linked in: snd_soc_hdmi_codec cros_ec_debugfs cros_ec_sysfs uvcvideo dw_hdmi_cec dw_hdmi_i2s_audio videobuf2_vmalloc cfg80211 gpio_charger rk_crypto rfkill videobuf2_memops videobuf2_v4l2 des_generic videobuf2_common ofpart m25p80 spi_nor tpm_i2c_infineon sbs_battery mtd tpm joydev cros_ec_dev coreboot_table evdev mousedev ip_tables x_tables [last unloaded: brcmutil] CPU: 2 PID: 361 Comm: systemd-sleep Not tainted 5.1.0-rc1-1-ARCH+ #1 Hardware name: Rockchip (Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x7c/0x9c) [] (dump_stack) from [] (__warn+0xd0/0xec) [] (__warn) from [] (warn_slowpath_null+0x38/0x44) [] (warn_slowpath_null) from [] (__flush_work+0x38/0x154) [] (__flush_work) from [] (__cancel_work_timer+0x114/0x1a4) [] (__cancel_work_timer) from [] (cros_ec_debugfs_suspend+0x14/0x1c [cros_ec_debugfs]) [] (cros_ec_debugfs_suspend [cros_ec_debugfs]) from [] (dpm_run_callback+0x64/0xcc) [] (dpm_run_callback) from [] (__device_suspend+0x174/0x3a8) [] (__device_suspend) from [] (dpm_suspend+0x174/0x1e0) [] (dpm_suspend) from [] (suspend_devices_and_enter+0x6c/0x50c) [] (suspend_devices_and_enter) from [] (pm_suspend+0x20c/0x274) [] (pm_suspend) from [] (state_store+0x54/0x88) [] (state_store) from [] (kernfs_fop_write+0x114/0x180) [] (kernfs_fop_write) from [] (__vfs_write+0x1c/0x154) [] (__vfs_write) from [] (vfs_write+0xb8/0x198) [] (vfs_write) from [] (ksys_write+0x3c/0x74) [] (ksys_write) from [] (ret_fast_syscall+0x0/0x4c) Exception stack(0xe9365fa8 to 0xe9365ff0) 5fa0: 00000004 beef8b28 00000004 beef8b28 00000004 00000000 5fc0: 00000004 beef8b28 02319170 00000004 beef8b28 00000004 b6f3d900 beef8b74 5fe0: 0000006c beef8a98 b6c0adac b6c66534 ---[ end trace f4ee5df14e8ea0ec ]--- If console logging is not supported, the work structure is never initialized, resulting in the traceback. Calling cancel/schedule functions conditionally fixes the problem. While at it, also fix error handling in the probe function. Reported-by: Urja Rannikko Cc: Urja Rannikko Fixes: 6fce0a2cf5a05 ("mfd / platform: cros_ec: Move debugfs attributes to its own driver") Signed-off-by: Guenter Roeck Signed-off-by: Benson Leung --- drivers/platform/chrome/cros_ec_debugfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/platform/chrome/cros_ec_debugfs.c b/drivers/platform/chrome/cros_ec_debugfs.c index 900c7073c46f..71308766e891 100644 --- a/drivers/platform/chrome/cros_ec_debugfs.c +++ b/drivers/platform/chrome/cros_ec_debugfs.c @@ -440,7 +440,7 @@ static int cros_ec_debugfs_probe(struct platform_device *pd) ret = cros_ec_create_pdinfo(debug_info); if (ret) - goto remove_debugfs; + goto remove_log; ec->debug_info = debug_info; @@ -448,6 +448,8 @@ static int cros_ec_debugfs_probe(struct platform_device *pd) return 0; +remove_log: + cros_ec_cleanup_console_log(debug_info); remove_debugfs: debugfs_remove_recursive(debug_info->dir); return ret; @@ -467,7 +469,8 @@ static int __maybe_unused cros_ec_debugfs_suspend(struct device *dev) { struct cros_ec_dev *ec = dev_get_drvdata(dev); - cancel_delayed_work_sync(&ec->debug_info->log_poll_work); + if (ec->debug_info->log_buffer.buf) + cancel_delayed_work_sync(&ec->debug_info->log_poll_work); return 0; } @@ -476,7 +479,8 @@ static int __maybe_unused cros_ec_debugfs_resume(struct device *dev) { struct cros_ec_dev *ec = dev_get_drvdata(dev); - schedule_delayed_work(&ec->debug_info->log_poll_work, 0); + if (ec->debug_info->log_buffer.buf) + schedule_delayed_work(&ec->debug_info->log_poll_work, 0); return 0; } -- cgit From a80868f398554842b14d07060012c06efb57c456 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Tue, 12 Mar 2019 09:52:51 +0000 Subject: KVM: arm/arm64: Enforce PTE mappings at stage2 when needed commit 6794ad5443a2118 ("KVM: arm/arm64: Fix unintended stage 2 PMD mappings") made the checks to skip huge mappings, stricter. However it introduced a bug where we still use huge mappings, ignoring the flag to use PTE mappings, by not reseting the vma_pagesize to PAGE_SIZE. Also, the checks do not cover the PUD huge pages, that was under review during the same period. This patch fixes both the issues. Fixes : 6794ad5443a2118 ("KVM: arm/arm64: Fix unintended stage 2 PMD mappings") Reported-by: Zenghui Yu Cc: Zenghui Yu Cc: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 43 +++++++++++++++++++++---------------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index ffd7acdceac7..bcdf978c0d1d 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1594,8 +1594,9 @@ static void kvm_send_hwpoison_signal(unsigned long address, send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); } -static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, - unsigned long hva) +static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, + unsigned long hva, + unsigned long map_size) { gpa_t gpa_start; hva_t uaddr_start, uaddr_end; @@ -1610,34 +1611,34 @@ static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, /* * Pages belonging to memslots that don't have the same alignment - * within a PMD for userspace and IPA cannot be mapped with stage-2 - * PMD entries, because we'll end up mapping the wrong pages. + * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2 + * PMD/PUD entries, because we'll end up mapping the wrong pages. * * Consider a layout like the following: * * memslot->userspace_addr: * +-----+--------------------+--------------------+---+ - * |abcde|fgh Stage-1 PMD | Stage-1 PMD tv|xyz| + * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| * +-----+--------------------+--------------------+---+ * * memslot->base_gfn << PAGE_SIZE: * +---+--------------------+--------------------+-----+ - * |abc|def Stage-2 PMD | Stage-2 PMD |tvxyz| + * |abc|def Stage-2 block | Stage-2 block |tvxyz| * +---+--------------------+--------------------+-----+ * - * If we create those stage-2 PMDs, we'll end up with this incorrect + * If we create those stage-2 blocks, we'll end up with this incorrect * mapping: * d -> f * e -> g * f -> h */ - if ((gpa_start & ~S2_PMD_MASK) != (uaddr_start & ~S2_PMD_MASK)) + if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) return false; /* * Next, let's make sure we're not trying to map anything not covered - * by the memslot. This means we have to prohibit PMD size mappings - * for the beginning and end of a non-PMD aligned and non-PMD sized + * by the memslot. This means we have to prohibit block size mappings + * for the beginning and end of a non-block aligned and non-block sized * memory slot (illustrated by the head and tail parts of the * userspace view above containing pages 'abcde' and 'xyz', * respectively). @@ -1646,8 +1647,8 @@ static bool fault_supports_stage2_pmd_mappings(struct kvm_memory_slot *memslot, * userspace_addr or the base_gfn, as both are equally aligned (per * the check above) and equally sized. */ - return (hva & S2_PMD_MASK) >= uaddr_start && - (hva & S2_PMD_MASK) + S2_PMD_SIZE <= uaddr_end; + return (hva & ~(map_size - 1)) >= uaddr_start && + (hva & ~(map_size - 1)) + map_size <= uaddr_end; } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, @@ -1676,12 +1677,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, return -EFAULT; } - if (!fault_supports_stage2_pmd_mappings(memslot, hva)) - force_pte = true; - - if (logging_active) - force_pte = true; - /* Let's check if we will get back a huge page backed by hugetlbfs */ down_read(¤t->mm->mmap_sem); vma = find_vma_intersection(current->mm, hva, hva + 1); @@ -1692,6 +1687,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } vma_pagesize = vma_kernel_pagesize(vma); + if (logging_active || + !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { + force_pte = true; + vma_pagesize = PAGE_SIZE; + } + /* * The stage2 has a minimum of 2 level table (For arm64 see * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can @@ -1699,11 +1700,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * As for PUD huge maps, we must make sure that we have at least * 3 levels, i.e, PMD is not folded. */ - if ((vma_pagesize == PMD_SIZE || - (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) && - !force_pte) { + if (vma_pagesize == PMD_SIZE || + (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; - } up_read(¤t->mm->mmap_sem); /* We need minimum second+third level pages */ -- cgit From db983f6eef57a9d78af79bc32389b7e60eb3c47d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 18 Mar 2019 09:29:26 -0700 Subject: scsi: core: Also call destroy_rcu_head() for passthrough requests cmd->rcu is initialized by scsi_initialize_rq(). For passthrough requests, blk_get_request() calls scsi_initialize_rq(). For filesystem requests, scsi_init_command() calls scsi_initialize_rq(). Make sure that destroy_rcu_head() is called for passthrough requests. Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ewan D. Milne Cc: Johannes Thumshirn Reported-by: Ewan D. Milne Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 20189675677a..f0db2dd0be75 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -585,9 +585,16 @@ static bool scsi_end_request(struct request *req, blk_status_t error, if (!blk_rq_is_scsi(req)) { WARN_ON_ONCE(!(cmd->flags & SCMD_INITIALIZED)); cmd->flags &= ~SCMD_INITIALIZED; - destroy_rcu_head(&cmd->rcu); } + /* + * Calling rcu_barrier() is not necessary here because the + * SCSI error handler guarantees that the function called by + * call_rcu() has been called before scsi_end_request() is + * called. + */ + destroy_rcu_head(&cmd->rcu); + /* * In the MQ case the command gets freed by __blk_mq_end_request, * so we have to do all cleanup that depends on it earlier. -- cgit From 17605afaae825b0291f80c62a7f6565879edaa8a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 15 Mar 2019 16:27:58 -0700 Subject: scsi: core: Avoid that a kernel warning appears during system resume Since scsi_device_quiesce() skips SCSI devices that have another state than RUNNING, OFFLINE or TRANSPORT_OFFLINE, scsi_device_resume() should not complain about SCSI devices that have been skipped. Hence this patch. This patch avoids that the following warning appears during resume: WARNING: CPU: 3 PID: 1039 at blk_clear_pm_only+0x2a/0x30 CPU: 3 PID: 1039 Comm: kworker/u8:49 Not tainted 5.0.0+ #1 Hardware name: LENOVO 4180F42/4180F42, BIOS 83ET75WW (1.45 ) 05/10/2013 Workqueue: events_unbound async_run_entry_fn RIP: 0010:blk_clear_pm_only+0x2a/0x30 Call Trace: ? scsi_device_resume+0x28/0x50 ? scsi_dev_type_resume+0x2b/0x80 ? async_run_entry_fn+0x2c/0xd0 ? process_one_work+0x1f0/0x3f0 ? worker_thread+0x28/0x3c0 ? process_one_work+0x3f0/0x3f0 ? kthread+0x10c/0x130 ? __kthread_create_on_node+0x150/0x150 ? ret_from_fork+0x1f/0x30 Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Ming Lei Cc: Johannes Thumshirn Cc: Oleksandr Natalenko Cc: Martin Steigerwald Cc: Reported-by: Jisheng Zhang Tested-by: Jisheng Zhang Fixes: 3a0a529971ec ("block, scsi: Make SCSI quiesce and resume work reliably") # v4.15 Signed-off-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index f0db2dd0be75..601b9f1de267 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2548,8 +2548,10 @@ void scsi_device_resume(struct scsi_device *sdev) * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); - sdev->quiesced_by = NULL; - blk_clear_pm_only(sdev->request_queue); + if (sdev->quiesced_by) { + sdev->quiesced_by = NULL; + blk_clear_pm_only(sdev->request_queue); + } if (sdev->sdev_state == SDEV_QUIESCE) scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); -- cgit From 4705f10e82c63924bd84a9b31d15839ec9ba3d06 Mon Sep 17 00:00:00 2001 From: Quinn Tran Date: Fri, 15 Mar 2019 15:04:18 -0700 Subject: scsi: qla2xxx: Fix FC-AL connection target discovery Commit 7f147f9bfd44 ("scsi: qla2xxx: Fix N2N target discovery with Local loop") fixed N2N target discovery for local loop. However, same code is used for FC-AL discovery as well. Added check to make sure we are bypassing area and domain check only in N2N topology for target discovery. Fixes: 7f147f9bfd44 ("scsi: qla2xxx: Fix N2N target discovery with Local loop") Cc: stable@vger.kernel.org # 5.0+ Signed-off-by: Quinn Tran Signed-off-by: Himanshu Madhani Reviewed-by: Ewan D. Milne Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_init.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c index 420045155ba0..0c700b140ce7 100644 --- a/drivers/scsi/qla2xxx/qla_init.c +++ b/drivers/scsi/qla2xxx/qla_init.c @@ -4991,6 +4991,13 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha) if ((domain & 0xf0) == 0xf0) continue; + /* Bypass if not same domain and area of adapter. */ + if (area && domain && ((area != vha->d_id.b.area) || + (domain != vha->d_id.b.domain)) && + (ha->current_topology == ISP_CFG_NL)) + continue; + + /* Bypass invalid local loop ID. */ if (loop_id > LAST_LOCAL_LOOP_ID) continue; -- cgit From ac444b4f0ace05d7c4c99f6b1e5b0cae0852f025 Mon Sep 17 00:00:00 2001 From: Himanshu Madhani Date: Fri, 15 Mar 2019 15:04:19 -0700 Subject: scsi: qla2xxx: Fix NULL pointer crash due to stale CPUID This patch fixes crash due to NULL pointer derefrence because CPU pointer is not set and used by driver. Instead, driver is passes CPU as tag via ha->isp_ops->{lun_reset|target_reset} [ 30.160780] qla2xxx [0000:a0:00.1]-8038:9: Cable is unplugged... [ 69.984045] qla2xxx [0000:a0:00.0]-8009:8: DEVICE RESET ISSUED nexus=8:0:0 cmd=00000000b0d62f46. [ 69.992849] BUG: unable to handle kernel NULL pointer dereference at 0000000000000040 [ 70.000680] PGD 0 P4D 0 [ 70.003232] Oops: 0000 [#1] SMP PTI [ 70.006727] CPU: 2 PID: 6714 Comm: sg_reset Kdump: loaded Not tainted 4.18.0-67.el8.x86_64 #1 [ 70.015258] Hardware name: NEC Express5800/T110j [N8100-2758Y]/MX32-PH0-NJ, BIOS F11 02/13/2019 [ 70.024016] RIP: 0010:blk_mq_rq_cpu+0x9/0x10 [ 70.028315] Code: 01 58 01 00 00 48 83 c0 28 48 3d 80 02 00 00 75 ab c3 0f 1f 44 00 00 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 48 8b 47 08 <8b> 40 40 c3 0f 1f 00 0f 1f 44 00 00 48 83 ec 10 48 c7 c6 20 6e 7c [ 70.047087] RSP: 0018:ffff99a481487d58 EFLAGS: 00010246 [ 70.052322] RAX: 0000000000000000 RBX: ffffffffc041b08b RCX: 0000000000000000 [ 70.059466] RDX: 0000000000000000 RSI: ffff8d10b6b16898 RDI: ffff8d10b341e400 [ 70.066615] RBP: ffffffffc03a6bd0 R08: 0000000000000415 R09: 0000000000aaaaaa [ 70.073765] R10: 0000000000000001 R11: 0000000000000001 R12: ffff8d10b341e528 [ 70.080914] R13: ffff8d10aadefc00 R14: ffff8d0f64efa998 R15: ffff8d0f64efa000 [ 70.088083] FS: 00007f90a201e540(0000) GS:ffff8d10b6b00000(0000) knlGS:0000000000000000 [ 70.096188] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 70.101959] CR2: 0000000000000040 CR3: 0000000268886005 CR4: 00000000003606e0 [ 70.109127] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 70.116277] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 70.123425] Call Trace: [ 70.125896] __qla2xxx_eh_generic_reset+0xb1/0x220 [qla2xxx] [ 70.131572] scsi_ioctl_reset+0x1f5/0x2a0 [ 70.135600] scsi_ioctl+0x18e/0x397 [ 70.139099] ? sd_ioctl+0x7c/0x100 [sd_mod] [ 70.143287] blkdev_ioctl+0x32b/0x9f0 [ 70.146954] ? __check_object_size+0xa3/0x181 [ 70.151323] block_ioctl+0x39/0x40 [ 70.154735] do_vfs_ioctl+0xa4/0x630 [ 70.158322] ? syscall_trace_enter+0x1d3/0x2c0 [ 70.162769] ksys_ioctl+0x60/0x90 [ 70.166104] __x64_sys_ioctl+0x16/0x20 [ 70.169859] do_syscall_64+0x5b/0x1b0 [ 70.173532] entry_SYSCALL_64_after_hwframe+0x65/0xca [ 70.178587] RIP: 0033:0x7f90a1b3445b [ 70.182183] Code: 0f 1e fa 48 8b 05 2d aa 2c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d fd a9 2c 00 f7 d8 64 89 01 48 [ 70.200956] RSP: 002b:00007fffdca88b68 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [ 70.208535] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f90a1b3445b [ 70.215684] RDX: 00007fffdca88b84 RSI: 0000000000002284 RDI: 0000000000000003 [ 70.222833] RBP: 00007fffdca88ca8 R08: 00007fffdca88b84 R09: 0000000000000000 [ 70.229981] R10: 0000000000000000 R11: 0000000000000246 R12: 00007fffdca88b84 [ 70.237131] R13: 0000000000000000 R14: 000055ab09b0bd28 R15: 0000000000000000 [ 70.244284] Modules linked in: nft_chain_route_ipv4 xt_CHECKSUM nft_chain_nat_ipv4 ipt_MASQUERADE nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack libcrc32c ipt_REJECT nf_reject_ipv4 nft_counter nft_compat tun bridge stp llc nf_tables nfnetli nk devlink sunrpc vfat fat intel_rapl intel_pmc_core x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm wmi_bmof iTCO_wdt iTCO_ vendor_support irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ipmi_ssif intel_cstate intel_uncore intel_rapl_perf ipmi_si jo ydev pcspkr ipmi_devintf sg wmi ipmi_msghandler video acpi_power_meter acpi_pad mei_me i2c_i801 mei ip_tables ext4 mbcache jbd2 sr_mod cd rom sd_mod qla2xxx ast i2c_algo_bit drm_kms_helper nvme_fc syscopyarea sysfillrect uas sysimgblt fb_sys_fops nvme_fabrics ttm [ 70.314805] usb_storage nvme_core crc32c_intel scsi_transport_fc ahci drm libahci tg3 libata megaraid_sas pinctrl_cannonlake pinctrl_ intel [ 70.327335] CR2: 0000000000000040 Fixes: 9cf2bab630765 ("block: kill request ->cpu member") Signed-off-by: Himanshu Madhani Signed-off-by: Martin K. Petersen --- drivers/scsi/qla2xxx/qla_os.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index 677f82fdf56f..91f576d743fe 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -1517,7 +1517,7 @@ __qla2xxx_eh_generic_reset(char *name, enum nexus_wait_type type, goto eh_reset_failed; } err = 2; - if (do_reset(fcport, cmd->device->lun, blk_mq_rq_cpu(cmd->request) + 1) + if (do_reset(fcport, cmd->device->lun, 1) != QLA_SUCCESS) { ql_log(ql_log_warn, vha, 0x800c, "do_reset failed for cmd=%p.\n", cmd); -- cgit From 39bbd3310ec304a7b695240c6791893a88ec9729 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 13 Mar 2019 09:36:52 +0100 Subject: drm/amdgpu: revert "cleanup setting bulk_movable" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 8466cc61da89d33441e0d7a98de1ba98697cd465. It can trigger a reference counter bug in TTM. Need to investigate further, but for now revert the offending change. Signed-off-by: Christian König Reviewed-by: Michel Dänzer Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index bfa9062ce6b9..16fcb56c232b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -700,6 +700,8 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_vm_bo_base *bo_base, *tmp; int r = 0; + vm->bulk_moveable &= list_empty(&vm->evicted); + list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { struct amdgpu_bo *bo = bo_base->bo; -- cgit From 72464382fc2d3673eb51f21a57f2c0a320c1552f Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 18 Mar 2019 11:09:54 +0100 Subject: drm/amdgpu: fix invalid use of change_bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We only need to clear the bit in a 32bit integer. This fixes a crah on ARM64 and PPC64LE caused by "drm/amdgpu: update the vm invalidation engine layout V2" Signed-off-by: Christian König Acked-by: Alex Deucher Cc: stable@vger.kernel.org Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 600259b4e291..2fe8397241ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -742,7 +742,7 @@ static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) } ring->vm_inv_eng = inv_eng - 1; - change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub])); + vm_inv_engs[vmhub] &= ~(1 << ring->vm_inv_eng); dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", ring->name, ring->vm_inv_eng, ring->funcs->vmhub); -- cgit From c38dab7df7ee4fdecd51e390d4d33d5ef5cff49d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:49:56 -0700 Subject: perf record: Clarify help for --switch-output The help description for --switch-output looks like there are multiple comma separated fields. But it's actually a choice of different options. Make it clear and less confusing. Before: % perf record -h ... --switch-output[=] Switch output when receive SIGUSR2 or cross size,time threshold After: % perf record -h ... --switch-output[=] Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold Signed-off-by: Andi Kleen Acked-by: Jiri Olsa LPU-Reference: 20190314225002.30108-4-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-9yecyuha04nyg8toyd1b2pgi@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 02d7c40b2d10..e7144a1c1c82 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1989,8 +1989,8 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, "Record timestamp boundary (time of first/last samples)"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, - &record.switch_output.set, "signal,size,time", - "Switch output when receive SIGUSR2 or cross size,time threshold", + &record.switch_output.set, "signal or size[BKMG] or time[smhd]", + "Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold", "signal"), OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files, "Limit number of switch output generated files"), -- cgit From 702fb9b415e7c99bd671fc0d1da26574c125471a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:49:57 -0700 Subject: perf report: Show all sort keys in help output Show all the supported sort keys in the command line help output, so that it's not needed to refer to the manpage. Before: % perf report -h ... -s, --sort sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ... Please refer the man page for the complete list. After: % perf report -h ... -s, --sort sort by key(s): overhead overhead_sys overhead_us overhead_guest_sys overhead_guest_us overhead_children sample period pid comm dso symbol parent cpu ... Signed-off-by: Andi Kleen Acked-by: Jiri Olsa LPU-Reference: 20190314225002.30108-5-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-9r3uz2ch4izoi1uln3f889co@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 5 ++--- tools/perf/util/sort.c | 52 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/sort.h | 2 ++ 3 files changed, 56 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1921aaa9cece..4054eb1f98ac 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1083,10 +1083,9 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN(0, "header-only", &report.header_only, "Show only data header."), OPT_STRING('s', "sort", &sort_order, "key[,key2...]", - "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..." - " Please refer the man page for the complete list."), + sort_help("sort by key(s):")), OPT_STRING('F', "fields", &field_order, "key[,keys...]", - "output field(s): overhead, period, sample plus all of sort keys"), + sort_help("output field(s): overhead period sample ")), OPT_BOOLEAN(0, "show-cpu-utilization", &symbol_conf.show_cpu_utilization, "Show sample percentage for different cpu modes"), OPT_BOOLEAN_FLAG(0, "showcpuutilization", &symbol_conf.show_cpu_utilization, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index bdd30cab51cb..5d2518e89fc4 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -13,6 +13,7 @@ #include "evsel.h" #include "evlist.h" #include "strlist.h" +#include "strbuf.h" #include #include "mem-events.h" #include "annotate.h" @@ -3107,3 +3108,54 @@ void reset_output_field(void) reset_dimensions(); perf_hpp__reset_output_field(&perf_hpp_list); } + +#define INDENT (3*8 + 1) + +static void add_key(struct strbuf *sb, const char *str, int *llen) +{ + if (*llen >= 75) { + strbuf_addstr(sb, "\n\t\t\t "); + *llen = INDENT; + } + strbuf_addf(sb, " %s", str); + *llen += strlen(str) + 1; +} + +static void add_sort_string(struct strbuf *sb, struct sort_dimension *s, int n, + int *llen) +{ + int i; + + for (i = 0; i < n; i++) + add_key(sb, s[i].name, llen); +} + +static void add_hpp_sort_string(struct strbuf *sb, struct hpp_dimension *s, int n, + int *llen) +{ + int i; + + for (i = 0; i < n; i++) + add_key(sb, s[i].name, llen); +} + +const char *sort_help(const char *prefix) +{ + struct strbuf sb; + char *s; + int len = strlen(prefix) + INDENT; + + strbuf_init(&sb, 300); + strbuf_addstr(&sb, prefix); + add_hpp_sort_string(&sb, hpp_sort_dimensions, + ARRAY_SIZE(hpp_sort_dimensions), &len); + add_sort_string(&sb, common_sort_dimensions, + ARRAY_SIZE(common_sort_dimensions), &len); + add_sort_string(&sb, bstack_sort_dimensions, + ARRAY_SIZE(bstack_sort_dimensions), &len); + add_sort_string(&sb, memory_sort_dimensions, + ARRAY_SIZE(memory_sort_dimensions), &len); + s = strbuf_detach(&sb, NULL); + strbuf_release(&sb); + return s; +} diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index bb9442ab7a0c..ce376a73f964 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -296,6 +296,8 @@ void reset_output_field(void); void sort__setup_elide(FILE *fp); void perf_hpp__set_elide(int idx, bool elide); +const char *sort_help(const char *prefix); + int report_parse_ignore_callees_opt(const struct option *opt, const char *arg, int unset); bool is_strict_order(const char *order); -- cgit From a4e7e6efabc57e85e0737c2eaa391525c0ae36f3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:49:59 -0700 Subject: perf report: Indicate JITed code better in report Print [TID] tid %d instead of the crypted /tmp/perf-%d.map default. % cat >loop.java public class loop { public static void main(String[] args) { for (;;); } } ^D % javac loop.java % perf record java loop ^C Before: % perf report --stdio ... 56.09% java perf-34724.map [.] 0x00007fd5bd021896 19.12% java perf-34724.map [.] 0x00007fd5bd021887 9.79% java perf-34724.map [.] 0x00007fd5bd021783 8.97% java perf-34724.map [.] 0x00007fd5bd02175b After: % perf report --stdio ... 56.09% java [JIT] tid 34724 [.] 0x00007fd5bd021896 19.12% java [JIT] tid 34724 [.] 0x00007fd5bd021887 9.79% java [JIT] tid 34724 [.] 0x00007fd5bd021783 8.97% java [JIT] tid 34724 [.] 0x00007fd5bd02175b Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo LPU-Reference: 20190314225002.30108-7-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-r17l6py9g0sezb7mi1f286gt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ba58ba603b69..ab8a455d2283 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1141,28 +1141,34 @@ void dso__set_short_name(struct dso *dso, const char *name, bool name_allocated) static void dso__set_basename(struct dso *dso) { - /* - * basename() may modify path buffer, so we must pass - * a copy. - */ - char *base, *lname = strdup(dso->long_name); + char *base, *lname; + int tid; - if (!lname) - return; - - /* - * basename() may return a pointer to internal - * storage which is reused in subsequent calls - * so copy the result. - */ - base = strdup(basename(lname)); + if (sscanf(dso->long_name, "/tmp/perf-%d.map", &tid) == 1) { + if (asprintf(&base, "[JIT] tid %d", tid) < 0) + return; + } else { + /* + * basename() may modify path buffer, so we must pass + * a copy. + */ + lname = strdup(dso->long_name); + if (!lname) + return; - free(lname); + /* + * basename() may return a pointer to internal + * storage which is reused in subsequent calls + * so copy the result. + */ + base = strdup(basename(lname)); - if (!base) - return; + free(lname); - dso__set_short_name(dso, base, true); + if (!base) + return; + } + dso__set_short_name(dso, base, true); } int dso__name_len(const struct dso *dso) -- cgit From 90b10f47c0ee2a70bd036d9da5e810f522b54a8f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:50:00 -0700 Subject: perf script: Support relative time When comparing time stamps in 'perf script' traces it can be annoying to work with the full perf time stamps. Add a --reltime option that displays time stamps relative to the trace start to make it easier to read the traces. Note: not currently supported for --time. Report an error in this case. Before: % perf script swapper 0 [000] 245402.891216: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 245402.891223: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 245402.891227: 5 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 245402.891231: 41 cycles:ppp: ffffffffa0068816 native_write_msr+0x6 ([kernel.kallsyms]) swapper 0 [000] 245402.891235: 355 cycles:ppp: ffffffffa000dd51 intel_bts_enable_local+0x21 ([kernel.kallsyms]) swapper 0 [000] 245402.891239: 3084 cycles:ppp: ffffffffa0a0150a end_repeat_nmi+0x48 ([kernel.kallsyms]) After: % perf script --reltime swapper 0 [000] 0.000000: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 0.000006: 1 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 0.000010: 5 cycles:ppp: ffffffffa0068814 native_write_msr+0x4 ([kernel.kallsyms]) swapper 0 [000] 0.000014: 41 cycles:ppp: ffffffffa0068816 native_write_msr+0x6 ([kernel.kallsyms]) swapper 0 [000] 0.000018: 355 cycles:ppp: ffffffffa000dd51 intel_bts_enable_local+0x21 ([kernel.kallsyms]) swapper 0 [000] 0.000022: 3084 cycles:ppp: ffffffffa0a0150a end_repeat_nmi+0x48 ([kernel.kallsyms]) Committer notes: Do not use 'time' as the name of a variable, as this breaks the build on older glibcs: cc1: warnings being treated as errors builtin-script.c: In function 'perf_sample__fprintf_start': builtin-script.c:691: warning: declaration of 'time' shadows a global declaration /usr/include/time.h:187: warning: shadowed declaration is here Signed-off-by: Andi Kleen Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa LPU-Reference: 20190314225002.30108-8-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-bpahyi6pr9r399mvihu65fvc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 18 ++++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2e19fd7ffe35..9b0d04dd2a61 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -380,6 +380,9 @@ include::itrace.txt[] Set the maximum number of program blocks to print with brstackasm for each sample. +--reltime:: + Print time stamps relative to trace start. + --per-event-dump:: Create per event files with a "perf.data.EVENT.dump" name instead of printing to stdout, useful, for instance, for generating flamegraphs. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 2f93d60c5a17..61cfd8f70989 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -53,6 +53,8 @@ static char const *script_name; static char const *generate_script_lang; +static bool reltime; +static u64 initial_time; static bool debug_mode; static u64 last_timestamp; static u64 nr_unordered; @@ -686,7 +688,13 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, } if (PRINT_FIELD(TIME)) { - nsecs = sample->time; + u64 t = sample->time; + if (reltime) { + if (!initial_time) + initial_time = sample->time; + t = sample->time - initial_time; + } + nsecs = t; secs = nsecs / NSEC_PER_SEC; nsecs -= secs * NSEC_PER_SEC; @@ -694,7 +702,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += fprintf(fp, "%5lu.%09llu: ", secs, nsecs); else { char sample_time[32]; - timestamp__scnprintf_usec(sample->time, sample_time, sizeof(sample_time)); + timestamp__scnprintf_usec(t, sample_time, sizeof(sample_time)); printed += fprintf(fp, "%12s: ", sample_time); } } @@ -3413,6 +3421,7 @@ int cmd_script(int argc, const char **argv) "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), + OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -3487,6 +3496,11 @@ int cmd_script(int argc, const char **argv) } } + if (script.time_str && reltime) { + fprintf(stderr, "Don't combine --reltime with --time\n"); + return -1; + } + if (itrace_synth_opts.callchain && itrace_synth_opts.callchain_sz > scripting_max_stack) scripting_max_stack = itrace_synth_opts.callchain_sz; -- cgit From 75998bb263bf48c1c85d78cd2d2f3a97d3747cab Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:50:01 -0700 Subject: perf stat: Fix --no-scale The -c option to enable multiplex scaling has been useless for quite some time because scaling is default. It's only useful as --no-scale to disable scaling. But the non scaling code path has bitrotted and doesn't print anything because perf output code relies on value run/ena information. Also even when we don't want to scale a value it's still useful to show its multiplex percentage. This patch: - Fixes help and documentation to show --no-scale instead of -c - Removes -c, only keeps the long option because -c doesn't support negatives. - Enables running/enabled even with --no-scale - And fixes some other problems in the no-scale output. Before: $ perf stat --no-scale -e cycles true Performance counter stats for 'true': cycles 0.000984154 seconds time elapsed After: $ ./perf stat --no-scale -e cycles true Performance counter stats for 'true': 706,070 cycles 0.001219821 seconds time elapsed Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo LPU-Reference: 20190314225002.30108-9-andi@firstfloor.org Link: https://lkml.kernel.org/n/tip-xggjvwcdaj2aqy8ib3i4b1g6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-stat.txt | 5 ++--- tools/perf/builtin-stat.c | 3 ++- tools/perf/util/evsel.c | 3 +-- tools/perf/util/stat.c | 12 ++++-------- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4bc2085e5197..39c05f89104e 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -72,9 +72,8 @@ report:: --all-cpus:: system-wide collection from all CPUs (default if no target is specified) --c:: ---scale:: - scale/normalize counter values +--no-scale:: + Don't scale/normalize counter values -d:: --detailed:: diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 7b8f09b0b8bf..49ee3c2033ec 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -718,7 +718,8 @@ static struct option stat_options[] = { "system-wide collection from all CPUs"), OPT_BOOLEAN('g', "group", &group, "put the counters into a counter group"), - OPT_BOOLEAN('c', "scale", &stat_config.scale, "scale/normalize counters"), + OPT_BOOLEAN(0, "scale", &stat_config.scale, + "Use --no-scale to disable counter scaling for multiplexing"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), OPT_INTEGER('r', "repeat", &stat_config.run_count, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 3bbf73e979c0..53ec40cacd4b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1344,8 +1344,7 @@ void perf_counts_values__scale(struct perf_counts_values *count, scaled = 1; count->val = (u64)((double) count->val * count->ena / count->run + 0.5); } - } else - count->ena = count->run = 0; + } if (pscaled) *pscaled = scaled; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 4d40515307b8..2856cc9d5a31 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -291,10 +291,8 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel break; case AGGR_GLOBAL: aggr->val += count->val; - if (config->scale) { - aggr->ena += count->ena; - aggr->run += count->run; - } + aggr->ena += count->ena; + aggr->run += count->run; case AGGR_UNSET: default: break; @@ -442,10 +440,8 @@ int create_perf_stat_counter(struct perf_evsel *evsel, struct perf_event_attr *attr = &evsel->attr; struct perf_evsel *leader = evsel->leader; - if (config->scale) { - attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | - PERF_FORMAT_TOTAL_TIME_RUNNING; - } + attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; /* * The event is part of non trivial group, let's enable -- cgit From 42a5864cf0a9a7e4eb541b5f390749a69c288c80 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 14 Mar 2019 15:50:02 -0700 Subject: perf stat: Improve scaling The multiplexing scaling in perf stat mysteriously adds 0.5 to the value. This dates back to the original perf tool. Other scaling code doesn't use that strange convention. Remove the extra 0.5. Before: $ perf stat -e 'cycles,cycles,cycles,cycles,cycles,cycles' grep -rq foo Performance counter stats for 'grep -rq foo': 6,403,580 cycles (81.62%) 6,404,341 cycles (81.64%) 6,402,983 cycles (81.62%) 6,399,941 cycles (81.63%) 6,399,451 cycles (81.62%) 6,436,105 cycles (91.87%) 0.005843799 seconds time elapsed 0.002905000 seconds user 0.002902000 seconds sys After: $ perf stat -e 'cycles,cycles,cycles,cycles,cycles,cycles' grep -rq foo Performance counter stats for 'grep -rq foo': 6,422,704 cycles (81.68%) 6,401,842 cycles (81.68%) 6,398,432 cycles (81.68%) 6,397,098 cycles (81.68%) 6,396,074 cycles (81.67%) 6,434,980 cycles (91.62%) 0.005884437 seconds time elapsed 0.003580000 seconds user 0.002356000 seconds sys Signed-off-by: Andi Kleen Acked-by: Jiri Olsa LPU-Reference: 20190314225002.30108-10-andi@firstfloor.org Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 53ec40cacd4b..16f02b83b23f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1342,7 +1342,7 @@ void perf_counts_values__scale(struct perf_counts_values *count, count->val = 0; } else if (count->run < count->ena) { scaled = 1; - count->val = (u64)((double) count->val * count->ena / count->run + 0.5); + count->val = (u64)((double) count->val * count->ena / count->run); } } -- cgit From c3b4d5c4afb0856e62a60db04c71205712b0d24f Mon Sep 17 00:00:00 2001 From: Mamatha Inamdar Date: Thu, 7 Feb 2019 15:09:28 +0530 Subject: perf vendor events: Remove P8 HW events which are not supported This patch is to remove following hardware events from JSON file which are not supported on POWER8. pm_co_disp_fail pm_co_tm_sc_footprint pm_iside_disp pm_iside_disp_fail pm_iside_disp_fail_other pm_iside_mru_touch pm_l2_castout_mod pm_l2_castout_shr pm_l2_dc_inv pm_l2_disp_all_l2miss pm_l2_grp_guess_correct pm_l2_grp_guess_wrong pm_l2_ic_inv pm_l2_inst pm_l2_inst_miss pm_l2_ld pm_l2_ld_disp pm_l2_ld_hit pm_l2_ld_miss pm_l2_loc_guess_correct pm_l2_loc_guess_wrong pm_l2_rcld_disp pm_l2_rcld_disp_fail_addr pm_l2_rcld_disp_fail_other pm_l2_rcst_disp pm_l2_rcst_disp_fail_addr pm_l2_rcst_disp_fail_other pm_l2_rc_st_done pm_l2_rty_ld pm_l2_sn_m_rd_done pm_l2_sn_m_wr_done pm_l2_sn_sx_i_done pm_l2_st_disp pm_l2_st_hit pm_l2_sys_guess_correct pm_l2_sys_guess_wrong pm_l2_sys_pump pm_l3_ci_hit pm_l3_ci_miss pm_l3_cinj pm_l3_co pm_l3_co_lco pm_l3_grp_guess_correct pm_l3_grp_guess_wrong_high pm_l3_grp_guess_wrong_low pm_l3_hit pm_l3_l2_co_hit pm_l3_l2_co_miss pm_l3_lat_ci_hit pm_l3_lat_ci_miss pm_l3_ld_hit pm_l3_ld_miss pm_l3_loc_guess_correct pm_l3_loc_guess_wrong pm_l3_miss pm_l3_p0_co_l31 pm_l3_p0_co_mem pm_l3_p0_co_rty pm_l3_p0_grp_pump pm_l3_p0_lco_data pm_l3_p0_lco_no_data pm_l3_p0_lco_rty pm_l3_p0_node_pump pm_l3_p0_pf_rty pm_l3_p0_sn_hit pm_l3_p0_sn_inv pm_l3_p0_sn_miss pm_l3_p0_sys_pump pm_l3_p1_co_l31 pm_l3_p1_co_mem pm_l3_p1_co_rty pm_l3_p1_grp_pump pm_l3_p1_lco_data pm_l3_p1_lco_no_data pm_l3_p1_lco_rty pm_l3_p1_node_pump pm_l3_p1_pf_rty pm_l3_p1_sn_hit pm_l3_p1_sn_inv pm_l3_p1_sn_miss pm_l3_p1_sys_pump pm_l3_pf_hit_l3 pm_l3_sys_guess_correct pm_l3_sys_guess_wrong pm_l3_trans_pf pm_l3_wi0_busy pm_l3_wi_usage pm_non_tm_rst_sc pm_rd_clearing_sc pm_rd_forming_sc pm_rd_hit_pf pm_snp_tm_hit_m pm_snp_tm_hit_t pm_st_caused_fail pm_tm_cam_overflow pm_tm_cap_overflow pm_tm_fav_caused_fail pm_tm_ld_caused_fail pm_tm_ld_conf pm_tm_rst_sc pm_tm_sc_co pm_tm_st_caused_fail pm_tm_st_conf Signed-off-by: Mamatha Inamdar Acked-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Sukadev Bhattiprolu Fixes: 2a81fa3bb5ed ("perf vendor events: Add power8 PMU events") Link: http://lkml.kernel.org/r/154953186583.11022.14819560028300370163.stgit@localhost.localdomain Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/powerpc/power8/other.json | 594 --------------------- 1 file changed, 594 deletions(-) diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json index 704302c3e67d..9dc2f6b70354 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json @@ -347,18 +347,6 @@ "BriefDescription": "CO mach 0 Busy. Used by PMU to sample ave RC livetime(mach0 used as sample point)", "PublicDescription": "" }, - {, - "EventCode": "0x517082", - "EventName": "PM_CO_DISP_FAIL", - "BriefDescription": "CO dispatch failed due to all CO machines being busy", - "PublicDescription": "" - }, - {, - "EventCode": "0x527084", - "EventName": "PM_CO_TM_SC_FOOTPRINT", - "BriefDescription": "L2 did a cleanifdirty CO to the L3 (ie created an SC line in the L3)", - "PublicDescription": "" - }, {, "EventCode": "0x3608a", "EventName": "PM_CO_USAGE", @@ -1577,36 +1565,12 @@ "BriefDescription": "A Page Table Entry was loaded into the TLB with Shared (S) data from another core's L3 on the same chip due to a instruction side request", "PublicDescription": "" }, - {, - "EventCode": "0x617082", - "EventName": "PM_ISIDE_DISP", - "BriefDescription": "All i-side dispatch attempts", - "PublicDescription": "" - }, - {, - "EventCode": "0x627084", - "EventName": "PM_ISIDE_DISP_FAIL", - "BriefDescription": "All i-side dispatch attempts that failed due to a addr collision with another machine", - "PublicDescription": "" - }, - {, - "EventCode": "0x627086", - "EventName": "PM_ISIDE_DISP_FAIL_OTHER", - "BriefDescription": "All i-side dispatch attempts that failed due to a reason other than addrs collision", - "PublicDescription": "" - }, {, "EventCode": "0x4608e", "EventName": "PM_ISIDE_L2MEMACC", "BriefDescription": "valid when first beat of data comes in for an i-side fetch where data came from mem(or L4)", "PublicDescription": "" }, - {, - "EventCode": "0x44608e", - "EventName": "PM_ISIDE_MRU_TOUCH", - "BriefDescription": "Iside L2 MRU touch", - "PublicDescription": "" - }, {, "EventCode": "0x30ac", "EventName": "PM_ISU_REF_FX0", @@ -1733,222 +1697,36 @@ "BriefDescription": "Instruction Demand sectors wriittent into IL1", "PublicDescription": "" }, - {, - "EventCode": "0x417080", - "EventName": "PM_L2_CASTOUT_MOD", - "BriefDescription": "L2 Castouts - Modified (M, Mu, Me)", - "PublicDescription": "" - }, - {, - "EventCode": "0x417082", - "EventName": "PM_L2_CASTOUT_SHR", - "BriefDescription": "L2 Castouts - Shared (T, Te, Si, S)", - "PublicDescription": "" - }, {, "EventCode": "0x27084", "EventName": "PM_L2_CHIP_PUMP", "BriefDescription": "RC requests that were local on chip pump attempts", "PublicDescription": "" }, - {, - "EventCode": "0x427086", - "EventName": "PM_L2_DC_INV", - "BriefDescription": "Dcache invalidates from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x44608c", - "EventName": "PM_L2_DISP_ALL_L2MISS", - "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2miss", - "PublicDescription": "" - }, {, "EventCode": "0x27086", "EventName": "PM_L2_GROUP_PUMP", "BriefDescription": "RC requests that were on Node Pump attempts", "PublicDescription": "" }, - {, - "EventCode": "0x626084", - "EventName": "PM_L2_GRP_GUESS_CORRECT", - "BriefDescription": "L2 guess grp and guess was correct (data intra-6chip AND ^on-chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x626086", - "EventName": "PM_L2_GRP_GUESS_WRONG", - "BriefDescription": "L2 guess grp and guess was not correct (ie data on-chip OR beyond-6chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x427084", - "EventName": "PM_L2_IC_INV", - "BriefDescription": "Icache Invalidates from L2", - "PublicDescription": "" - }, - {, - "EventCode": "0x436088", - "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)", - "PublicDescription": "" - }, - {, - "EventCode": "0x43608a", - "EventName": "PM_L2_INST_MISS", - "BriefDescription": "All successful i-side dispatches that were an L2miss for this thread (excludes i_l2mru_tch reqs)", - "PublicDescription": "" - }, - {, - "EventCode": "0x416080", - "EventName": "PM_L2_LD", - "BriefDescription": "All successful D-side Load dispatches for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x437088", - "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful load dispatches", - "PublicDescription": "" - }, - {, - "EventCode": "0x43708a", - "EventName": "PM_L2_LD_HIT", - "BriefDescription": "All successful load dispatches that were L2 hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x426084", - "EventName": "PM_L2_LD_MISS", - "BriefDescription": "All successful D-Side Load dispatches that were an L2miss for this thread", - "PublicDescription": "" - }, - {, - "EventCode": "0x616080", - "EventName": "PM_L2_LOC_GUESS_CORRECT", - "BriefDescription": "L2 guess loc and guess was correct (ie data local)", - "PublicDescription": "" - }, - {, - "EventCode": "0x616082", - "EventName": "PM_L2_LOC_GUESS_WRONG", - "BriefDescription": "L2 guess loc and guess was not correct (ie data not on chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x516080", - "EventName": "PM_L2_RCLD_DISP", - "BriefDescription": "L2 RC load dispatch attempt", - "PublicDescription": "" - }, - {, - "EventCode": "0x516082", - "EventName": "PM_L2_RCLD_DISP_FAIL_ADDR", - "BriefDescription": "L2 RC load dispatch attempt failed due to address collision with RC/CO/SN/SQ", - "PublicDescription": "" - }, - {, - "EventCode": "0x526084", - "EventName": "PM_L2_RCLD_DISP_FAIL_OTHER", - "BriefDescription": "L2 RC load dispatch attempt failed due to other reasons", - "PublicDescription": "" - }, - {, - "EventCode": "0x536088", - "EventName": "PM_L2_RCST_DISP", - "BriefDescription": "L2 RC store dispatch attempt", - "PublicDescription": "" - }, - {, - "EventCode": "0x53608a", - "EventName": "PM_L2_RCST_DISP_FAIL_ADDR", - "BriefDescription": "L2 RC store dispatch attempt failed due to address collision with RC/CO/SN/SQ", - "PublicDescription": "" - }, - {, - "EventCode": "0x54608c", - "EventName": "PM_L2_RCST_DISP_FAIL_OTHER", - "BriefDescription": "L2 RC store dispatch attempt failed due to other reasons", - "PublicDescription": "" - }, - {, - "EventCode": "0x537088", - "EventName": "PM_L2_RC_ST_DONE", - "BriefDescription": "RC did st to line that was Tx or Sx", - "PublicDescription": "" - }, - {, - "EventCode": "0x63708a", - "EventName": "PM_L2_RTY_LD", - "BriefDescription": "RC retries on PB for any load from core", - "PublicDescription": "" - }, {, "EventCode": "0x3708a", "EventName": "PM_L2_RTY_ST", "BriefDescription": "RC retries on PB for any store from core", "PublicDescription": "" }, - {, - "EventCode": "0x54708c", - "EventName": "PM_L2_SN_M_RD_DONE", - "BriefDescription": "SNP dispatched for a read and was M", - "PublicDescription": "" - }, - {, - "EventCode": "0x54708e", - "EventName": "PM_L2_SN_M_WR_DONE", - "BriefDescription": "SNP dispatched for a write and was M", - "PublicDescription": "" - }, - {, - "EventCode": "0x53708a", - "EventName": "PM_L2_SN_SX_I_DONE", - "BriefDescription": "SNP dispatched and went from Sx or Tx to Ix", - "PublicDescription": "" - }, {, "EventCode": "0x17080", "EventName": "PM_L2_ST", "BriefDescription": "All successful D-side store dispatches for this thread", "PublicDescription": "" }, - {, - "EventCode": "0x44708c", - "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful store dispatches", - "PublicDescription": "" - }, - {, - "EventCode": "0x44708e", - "EventName": "PM_L2_ST_HIT", - "BriefDescription": "All successful store dispatches that were L2Hits", - "PublicDescription": "" - }, {, "EventCode": "0x17082", "EventName": "PM_L2_ST_MISS", "BriefDescription": "All successful D-side store dispatches for this thread that were L2 Miss", "PublicDescription": "" }, - {, - "EventCode": "0x636088", - "EventName": "PM_L2_SYS_GUESS_CORRECT", - "BriefDescription": "L2 guess sys and guess was correct (ie data beyond-6chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x63608a", - "EventName": "PM_L2_SYS_GUESS_WRONG", - "BriefDescription": "L2 guess sys and guess was not correct (ie data ^beyond-6chip)", - "PublicDescription": "" - }, - {, - "EventCode": "0x617080", - "EventName": "PM_L2_SYS_PUMP", - "BriefDescription": "RC requests that were system pump attempts", - "PublicDescription": "" - }, {, "EventCode": "0x1e05e", "EventName": "PM_L2_TM_REQ_ABORT", @@ -1961,36 +1739,12 @@ "BriefDescription": "TM marked store abort", "PublicDescription": "" }, - {, - "EventCode": "0x23808a", - "EventName": "PM_L3_CINJ", - "BriefDescription": "l3 ci of cache inject", - "PublicDescription": "" - }, - {, - "EventCode": "0x128084", - "EventName": "PM_L3_CI_HIT", - "BriefDescription": "L3 Castins Hit (total count", - "PublicDescription": "" - }, - {, - "EventCode": "0x128086", - "EventName": "PM_L3_CI_MISS", - "BriefDescription": "L3 castins miss (total count", - "PublicDescription": "" - }, {, "EventCode": "0x819082", "EventName": "PM_L3_CI_USAGE", "BriefDescription": "rotating sample of 16 CI or CO actives", "PublicDescription": "" }, - {, - "EventCode": "0x438088", - "EventName": "PM_L3_CO", - "BriefDescription": "l3 castout occurring ( does not include casthrough or log writes (cinj/dmaw)", - "PublicDescription": "" - }, {, "EventCode": "0x83908b", "EventName": "PM_L3_CO0_ALLOC", @@ -2009,120 +1763,18 @@ "BriefDescription": "L3 CO to L3.1 OR of port 0 and 1 ( lossy)", "PublicDescription": "" }, - {, - "EventCode": "0x238088", - "EventName": "PM_L3_CO_LCO", - "BriefDescription": "Total L3 castouts occurred on LCO", - "PublicDescription": "" - }, {, "EventCode": "0x28084", "EventName": "PM_L3_CO_MEM", "BriefDescription": "L3 CO to memory OR of port 0 and 1 ( lossy)", "PublicDescription": "" }, - {, - "EventCode": "0xb19082", - "EventName": "PM_L3_GRP_GUESS_CORRECT", - "BriefDescription": "Initial scope=group and data from same group (near) (pred successful)", - "PublicDescription": "" - }, - {, - "EventCode": "0xb3908a", - "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", - "BriefDescription": "Initial scope=group but data from local node. Predition too high", - "PublicDescription": "" - }, - {, - "EventCode": "0xb39088", - "EventName": "PM_L3_GRP_GUESS_WRONG_LOW", - "BriefDescription": "Initial scope=group but data from outside group (far or rem). Prediction too Low", - "PublicDescription": "" - }, - {, - "EventCode": "0x218080", - "EventName": "PM_L3_HIT", - "BriefDescription": "L3 Hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x138088", - "EventName": "PM_L3_L2_CO_HIT", - "BriefDescription": "L2 castout hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x13808a", - "EventName": "PM_L3_L2_CO_MISS", - "BriefDescription": "L2 castout miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x14808c", - "EventName": "PM_L3_LAT_CI_HIT", - "BriefDescription": "L3 Lateral Castins Hit", - "PublicDescription": "" - }, - {, - "EventCode": "0x14808e", - "EventName": "PM_L3_LAT_CI_MISS", - "BriefDescription": "L3 Lateral Castins Miss", - "PublicDescription": "" - }, - {, - "EventCode": "0x228084", - "EventName": "PM_L3_LD_HIT", - "BriefDescription": "L3 demand LD Hits", - "PublicDescription": "" - }, - {, - "EventCode": "0x228086", - "EventName": "PM_L3_LD_MISS", - "BriefDescription": "L3 demand LD Miss", - "PublicDescription": "" - }, {, "EventCode": "0x1e052", "EventName": "PM_L3_LD_PREF", "BriefDescription": "L3 Load Prefetches", "PublicDescription": "" }, - {, - "EventCode": "0xb19080", - "EventName": "PM_L3_LOC_GUESS_CORRECT", - "BriefDescription": "initial scope=node/chip and data from local node (local) (pred successful)", - "PublicDescription": "" - }, - {, - "EventCode": "0xb29086", - "EventName": "PM_L3_LOC_GUESS_WRONG", - "BriefDescription": "Initial scope=node but data from out side local node (near or far or rem). Prediction too Low", - "PublicDescription": "" - }, - {, - "EventCode": "0x218082", - "EventName": "PM_L3_MISS", - "BriefDescription": "L3 Misses", - "PublicDescription": "" - }, - {, - "EventCode": "0x54808c", - "EventName": "PM_L3_P0_CO_L31", - "BriefDescription": "l3 CO to L3.1 (lco) port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x538088", - "EventName": "PM_L3_P0_CO_MEM", - "BriefDescription": "l3 CO to memory port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x929084", - "EventName": "PM_L3_P0_CO_RTY", - "BriefDescription": "L3 CO received retry port 0", - "PublicDescription": "" - }, {, "EventCode": "0xa29084", "EventName": "PM_L3_P0_GRP_PUMP", @@ -2147,120 +1799,6 @@ "BriefDescription": "L3 LCO received retry port 0", "PublicDescription": "" }, - {, - "EventCode": "0xa19080", - "EventName": "PM_L3_P0_NODE_PUMP", - "BriefDescription": "L3 pf sent with nodal scope port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x919080", - "EventName": "PM_L3_P0_PF_RTY", - "BriefDescription": "L3 PF received retry port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x939088", - "EventName": "PM_L3_P0_SN_HIT", - "BriefDescription": "L3 snoop hit port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x118080", - "EventName": "PM_L3_P0_SN_INV", - "BriefDescription": "Port0 snooper detects someone doing a store to a line thats Sx", - "PublicDescription": "" - }, - {, - "EventCode": "0x94908c", - "EventName": "PM_L3_P0_SN_MISS", - "BriefDescription": "L3 snoop miss port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0xa39088", - "EventName": "PM_L3_P0_SYS_PUMP", - "BriefDescription": "L3 pf sent with sys scope port 0", - "PublicDescription": "" - }, - {, - "EventCode": "0x54808e", - "EventName": "PM_L3_P1_CO_L31", - "BriefDescription": "l3 CO to L3.1 (lco) port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x53808a", - "EventName": "PM_L3_P1_CO_MEM", - "BriefDescription": "l3 CO to memory port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x929086", - "EventName": "PM_L3_P1_CO_RTY", - "BriefDescription": "L3 CO received retry port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa29086", - "EventName": "PM_L3_P1_GRP_PUMP", - "BriefDescription": "L3 pf sent with grp scope port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x528086", - "EventName": "PM_L3_P1_LCO_DATA", - "BriefDescription": "lco sent with data port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x518082", - "EventName": "PM_L3_P1_LCO_NO_DATA", - "BriefDescription": "dataless l3 lco sent port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa4908e", - "EventName": "PM_L3_P1_LCO_RTY", - "BriefDescription": "L3 LCO received retry port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa19082", - "EventName": "PM_L3_P1_NODE_PUMP", - "BriefDescription": "L3 pf sent with nodal scope port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x919082", - "EventName": "PM_L3_P1_PF_RTY", - "BriefDescription": "L3 PF received retry port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x93908a", - "EventName": "PM_L3_P1_SN_HIT", - "BriefDescription": "L3 snoop hit port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0x118082", - "EventName": "PM_L3_P1_SN_INV", - "BriefDescription": "Port1 snooper detects someone doing a store to a line thats Sx", - "PublicDescription": "" - }, - {, - "EventCode": "0x94908e", - "EventName": "PM_L3_P1_SN_MISS", - "BriefDescription": "L3 snoop miss port 1", - "PublicDescription": "" - }, - {, - "EventCode": "0xa3908a", - "EventName": "PM_L3_P1_SYS_PUMP", - "BriefDescription": "L3 pf sent with sys scope port 1", - "PublicDescription": "" - }, {, "EventCode": "0x84908d", "EventName": "PM_L3_PF0_ALLOC", @@ -2273,12 +1811,6 @@ "BriefDescription": "lifetime, sample of PF machine 0 valid", "PublicDescription": "" }, - {, - "EventCode": "0x428084", - "EventName": "PM_L3_PF_HIT_L3", - "BriefDescription": "l3 pf hit in l3", - "PublicDescription": "" - }, {, "EventCode": "0x18080", "EventName": "PM_L3_PF_MISS_L3", @@ -2369,42 +1901,12 @@ "BriefDescription": "Data stream touchto L3", "PublicDescription": "" }, - {, - "EventCode": "0xb29084", - "EventName": "PM_L3_SYS_GUESS_CORRECT", - "BriefDescription": "Initial scope=system and data from outside group (far or rem)(pred successful)", - "PublicDescription": "" - }, - {, - "EventCode": "0xb4908c", - "EventName": "PM_L3_SYS_GUESS_WRONG", - "BriefDescription": "Initial scope=system but data from local or near. Predction too high", - "PublicDescription": "" - }, - {, - "EventCode": "0x24808e", - "EventName": "PM_L3_TRANS_PF", - "BriefDescription": "L3 Transient prefetch", - "PublicDescription": "" - }, {, "EventCode": "0x18081", "EventName": "PM_L3_WI0_ALLOC", "BriefDescription": "lifetime, sample of Write Inject machine 0 valid", "PublicDescription": "0.0" }, - {, - "EventCode": "0x418080", - "EventName": "PM_L3_WI0_BUSY", - "BriefDescription": "lifetime, sample of Write Inject machine 0 valid", - "PublicDescription": "" - }, - {, - "EventCode": "0x418082", - "EventName": "PM_L3_WI_USAGE", - "BriefDescription": "rotating sample of 8 WI actives", - "PublicDescription": "" - }, {, "EventCode": "0xc080", "EventName": "PM_LD_REF_L1_LSU0", @@ -3311,12 +2813,6 @@ "BriefDescription": "Dispatch time non favored tbegin", "PublicDescription": "" }, - {, - "EventCode": "0x328084", - "EventName": "PM_NON_TM_RST_SC", - "BriefDescription": "non tm snp rst tm sc", - "PublicDescription": "" - }, {, "EventCode": "0x2001a", "EventName": "PM_NTCG_ALL_FIN", @@ -3419,24 +2915,6 @@ "BriefDescription": "Continuous 16 cycle(2to1) window where this signals rotates thru sampling each L2 RC machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running", "PublicDescription": "" }, - {, - "EventCode": "0x34808e", - "EventName": "PM_RD_CLEARING_SC", - "BriefDescription": "rd clearing sc", - "PublicDescription": "" - }, - {, - "EventCode": "0x34808c", - "EventName": "PM_RD_FORMING_SC", - "BriefDescription": "rd forming sc", - "PublicDescription": "" - }, - {, - "EventCode": "0x428086", - "EventName": "PM_RD_HIT_PF", - "BriefDescription": "rd machine hit l3 pf machine", - "PublicDescription": "" - }, {, "EventCode": "0x20004", "EventName": "PM_REAL_SRQ_FULL", @@ -3503,18 +2981,6 @@ "BriefDescription": "TLBIE snoop", "PublicDescription": "TLBIE snoopSnoop TLBIE" }, - {, - "EventCode": "0x338088", - "EventName": "PM_SNP_TM_HIT_M", - "BriefDescription": "snp tm st hit m mu", - "PublicDescription": "" - }, - {, - "EventCode": "0x33808a", - "EventName": "PM_SNP_TM_HIT_T", - "BriefDescription": "snp tm_st_hit t tn te", - "PublicDescription": "" - }, {, "EventCode": "0x4608c", "EventName": "PM_SN_USAGE", @@ -3533,12 +2999,6 @@ "BriefDescription": "STCX executed reported at sent to nest", "PublicDescription": "STCX executed reported at sent to nest42" }, - {, - "EventCode": "0x717080", - "EventName": "PM_ST_CAUSED_FAIL", - "BriefDescription": "Non TM St caused any thread to fail", - "PublicDescription": "" - }, {, "EventCode": "0x3090", "EventName": "PM_SWAP_CANCEL", @@ -3623,18 +3083,6 @@ "BriefDescription": "Tm any tbegin", "PublicDescription": "" }, - {, - "EventCode": "0x318082", - "EventName": "PM_TM_CAM_OVERFLOW", - "BriefDescription": "l3 tm cam overflow during L2 co of SC", - "PublicDescription": "" - }, - {, - "EventCode": "0x74708c", - "EventName": "PM_TM_CAP_OVERFLOW", - "BriefDescription": "TM Footprint Capactiy Overflow", - "PublicDescription": "" - }, {, "EventCode": "0x20ba", "EventName": "PM_TM_END_ALL", @@ -3689,48 +3137,6 @@ "BriefDescription": "Transactional conflict from LSU, whatever gets reported to texas", "PublicDescription": "Transactional conflict from LSU, whatever gets reported to texas 42" }, - {, - "EventCode": "0x727086", - "EventName": "PM_TM_FAV_CAUSED_FAIL", - "BriefDescription": "TM Load (fav) caused another thread to fail", - "PublicDescription": "" - }, - {, - "EventCode": "0x717082", - "EventName": "PM_TM_LD_CAUSED_FAIL", - "BriefDescription": "Non TM Ld caused any thread to fail", - "PublicDescription": "" - }, - {, - "EventCode": "0x727084", - "EventName": "PM_TM_LD_CONF", - "BriefDescription": "TM Load (fav or non-fav) ran into conflict (failed)", - "PublicDescription": "" - }, - {, - "EventCode": "0x328086", - "EventName": "PM_TM_RST_SC", - "BriefDescription": "tm snp rst tm sc", - "PublicDescription": "" - }, - {, - "EventCode": "0x318080", - "EventName": "PM_TM_SC_CO", - "BriefDescription": "l3 castout tm Sc line", - "PublicDescription": "" - }, - {, - "EventCode": "0x73708a", - "EventName": "PM_TM_ST_CAUSED_FAIL", - "BriefDescription": "TM Store (fav or non-fav) caused another thread to fail", - "PublicDescription": "" - }, - {, - "EventCode": "0x737088", - "EventName": "PM_TM_ST_CONF", - "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)", - "PublicDescription": "" - }, {, "EventCode": "0x20bc", "EventName": "PM_TM_TBEGIN", -- cgit From af7a14a750b8eb3cdb26ec15344616ca170b06f2 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:41 +0800 Subject: perf tools: Add doc about how to build perf with Asan and UBSan AddressSanitizer (or ASan) and UndefinedBehaviorSanitizer (or UBSan) are very useful tools to detect program bugs: - AddressSanitizer (or ASan) is a GCC feature that detects memory corruption bugs such as buffer overflows and memory leaks. - UndefinedBehaviorSanitizer (or UBSan) is a fast undefined behavior detector supported by GCC. UBSan detects undefined behaviors of programs at runtime. This patch adds a document about how to use them on perf. Later patches will fix some of the issues disclosed by them. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lkml.kernel.org/r/20190316080556.3075-2-changbin.du@gmail.com [ Make some changes based on comments made by Jiri Olsa ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/Build.txt | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt index f6fc6507ba55..3766886c4bca 100644 --- a/tools/perf/Documentation/Build.txt +++ b/tools/perf/Documentation/Build.txt @@ -47,3 +47,27 @@ Those objects are then used in final linking: NOTE this description is omitting other libraries involved, only focusing on build framework outcomes + +3) Build with ASan or UBSan +========================== + $ cd tools/perf + $ make DESTDIR=/usr + $ make DESTDIR=/usr install + +AddressSanitizer (or ASan) is a GCC feature that detects memory corruption bugs +such as buffer overflows and memory leaks. + + $ cd tools/perf + $ make DEBUG=1 EXTRA_CFLAGS='-fno-omit-frame-pointer -fsanitize=address' + $ ASAN_OPTIONS=log_path=asan.log ./perf record -a + +ASan outputs all detected issues into a log file named 'asan.log.'. + +UndefinedBehaviorSanitizer (or UBSan) is a fast undefined behavior detector +supported by GCC. UBSan detects undefined behaviors of programs at runtime. + + $ cd tools/perf + $ make DEBUG=1 EXTRA_CFLAGS='-fno-omit-frame-pointer -fsanitize=undefined' + $ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a + +If UBSan detects any problem at runtime, it outputs a “runtime error:” message. -- cgit From 39df730b09774bd860e39ea208a48d15078236cb Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:42 +0800 Subject: perf list: Don't forget to drop the reference to the allocated thread_map Detected via gcc's ASan: Direct leak of 2048 byte(s) in 64 object(s) allocated from: 6 #0 0x7f606512e370 in __interceptor_realloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee370) 7 #1 0x556b0f1d7ddd in thread_map__realloc util/thread_map.c:43 8 #2 0x556b0f1d84c7 in thread_map__new_by_tid util/thread_map.c:85 9 #3 0x556b0f0e045e in is_event_supported util/parse-events.c:2250 10 #4 0x556b0f0e1aa1 in print_hwcache_events util/parse-events.c:2382 11 #5 0x556b0f0e3231 in print_events util/parse-events.c:2514 12 #6 0x556b0ee0a66e in cmd_list /home/changbin/work/linux/tools/perf/builtin-list.c:58 13 #7 0x556b0f01e0ae in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 14 #8 0x556b0f01e859 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 15 #9 0x556b0f01edc8 in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 16 #10 0x556b0f01f71f in main /home/changbin/work/linux/tools/perf/perf.c:520 17 #11 0x7f6062ccf09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 89896051f8da ("perf tools: Do not put a variable sized type not at the end of a struct") Link: http://lkml.kernel.org/r/20190316080556.3075-3-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 4dcc01b2532c..2e9035c4c252 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2271,6 +2271,7 @@ static bool is_event_supported(u8 type, unsigned config) perf_evsel__delete(evsel); } + thread_map__put(tmap); return ret; } -- cgit From 11c1ea6f1a9bc97bf857fd12f72eacb6c69794e2 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:43 +0800 Subject: perf tools: Fix errors under optimization level '-Og' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimization level '-Og' offers a reasonable level of optimization while maintaining fast compilation and a good debugging experience. This patch tries to make it work. $ make DEBUG=1 EXTRA_CFLAGS='-Og' bench/epoll-ctl.c: In function ‘do_threads’: bench/epoll-ctl.c:274:9: error: ‘ret’ may be used uninitialized in this function [-Werror=maybe-uninitialized] return ret; ^~~ ... Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lkml.kernel.org/r/20190316080556.3075-4-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/libbpf.c | 2 +- tools/perf/bench/epoll-ctl.c | 2 +- tools/perf/bench/epoll-wait.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index f5eb60379c8d..4884557aa17f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -622,7 +622,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) bool strict = !(flags & MAPS_RELAX_COMPAT); int i, map_idx, map_def_sz, nr_maps = 0; Elf_Scn *scn; - Elf_Data *data; + Elf_Data *data = NULL; Elf_Data *symbols = obj->efile.symbols; if (obj->efile.maps_shndx < 0) diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c index 0c0a6e824934..2af067859966 100644 --- a/tools/perf/bench/epoll-ctl.c +++ b/tools/perf/bench/epoll-ctl.c @@ -224,7 +224,7 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) pthread_attr_t thread_attr, *attrp = NULL; cpu_set_t cpuset; unsigned int i, j; - int ret; + int ret = 0; if (!noaffinity) pthread_attr_init(&thread_attr); diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c index 5a11534e96a0..fe85448abd45 100644 --- a/tools/perf/bench/epoll-wait.c +++ b/tools/perf/bench/epoll-wait.c @@ -293,7 +293,7 @@ static int do_threads(struct worker *worker, struct cpu_map *cpu) pthread_attr_t thread_attr, *attrp = NULL; cpu_set_t cpuset; unsigned int i, j; - int ret, events = EPOLLIN; + int ret = 0, events = EPOLLIN; if (oneshot) events |= EPOLLONESHOT; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 6d598cc071ae..1a9c3becf5ff 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -18,7 +18,7 @@ static void testcase(void) int i; for (i = 0; i < NR_ITERS; i++) { - char proc_name[10]; + char proc_name[15]; snprintf(proc_name, sizeof(proc_name), "p:%d\n", i); prctl(PR_SET_NAME, proc_name); -- cgit From 9b40dff7ba3caaf0d1919f98e136fa3400bd34aa Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:44 +0800 Subject: perf config: Fix an error in the config template documentation The option 'sort-order' should be 'sort_order'. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 893c5c798be9 ("perf config: Show default report configuration in example and docs") Link: http://lkml.kernel.org/r/20190316080556.3075-5-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-config.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 95054a8176a2..462b3cde0675 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -114,7 +114,7 @@ Given a $HOME/.perfconfig like this: [report] # Defaults - sort-order = comm,dso,symbol + sort_order = comm,dso,symbol percent-limit = 0 queue-size = 0 children = true -- cgit From 54569ba4b06d5baedae4614bde33a25a191473ba Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:45 +0800 Subject: perf config: Fix a memory leak in collect_config() Detected with gcc's ASan: Direct leak of 66 byte(s) in 5 object(s) allocated from: #0 0x7ff3b1f32070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x560c8761034d in collect_config util/config.c:597 #2 0x560c8760d9cb in get_value util/config.c:169 #3 0x560c8760dfd7 in perf_parse_file util/config.c:285 #4 0x560c8760e0d2 in perf_config_from_file util/config.c:476 #5 0x560c876108fd in perf_config_set__init util/config.c:661 #6 0x560c87610c72 in perf_config_set__new util/config.c:709 #7 0x560c87610d2f in perf_config__init util/config.c:718 #8 0x560c87610e5d in perf_config util/config.c:730 #9 0x560c875ddea0 in main /home/changbin/work/linux/tools/perf/perf.c:442 #10 0x7ff3afb8609a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Cc: Taeung Song Fixes: 20105ca1240c ("perf config: Introduce perf_config_set class") Link: http://lkml.kernel.org/r/20190316080556.3075-6-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/config.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index fa092511c52b..7e3c1b60120c 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -633,11 +633,10 @@ static int collect_config(const char *var, const char *value, } ret = set_value(item, value); - return ret; out_free: free(key); - return -1; + return ret; } int perf_config_set__collect(struct perf_config_set *set, const char *file_name, -- cgit From 8bde8516893da5a5fdf06121f74d11b52ab92df5 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:46 +0800 Subject: perf build-id: Fix memory leak in print_sdt_events() Detected with gcc's ASan: Direct leak of 4356 byte(s) in 120 object(s) allocated from: #0 0x7ff1a2b5a070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x55719aef4814 in build_id_cache__origname util/build-id.c:215 #2 0x55719af649b6 in print_sdt_events util/parse-events.c:2339 #3 0x55719af66272 in print_events util/parse-events.c:2542 #4 0x55719ad1ecaa in cmd_list /home/changbin/work/linux/tools/perf/builtin-list.c:58 #5 0x55719aec745d in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #6 0x55719aec7d1a in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #7 0x55719aec8184 in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #8 0x55719aeca41a in main /home/changbin/work/linux/tools/perf/perf.c:520 #9 0x7ff1a07ae09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 40218daea1db ("perf list: Show SDT and pre-cached events") Link: http://lkml.kernel.org/r/20190316080556.3075-7-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/build-id.c | 1 + tools/perf/util/parse-events.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index bff0d17920ed..0c5517a8d0b7 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -185,6 +185,7 @@ char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size) return bf; } +/* The caller is responsible to free the returned buffer. */ char *build_id_cache__origname(const char *sbuild_id) { char *linkname; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2e9035c4c252..5ef4939408f2 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2342,6 +2342,7 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, printf(" %-50s [%s]\n", buf, "SDT event"); free(buf); } + free(path); } else printf(" %-50s [%s]\n", nd->s, "SDT event"); if (nd2) { -- cgit From 0dba9e4be95b59e77060645ca8e37ca3231061f5 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:47 +0800 Subject: perf top: Delete the evlist before perf_session, fixing heap-use-after-free issue The evlist should be destroyed before the perf session. Detected with gcc's ASan: ================================================================= ==27350==ERROR: AddressSanitizer: heap-use-after-free on address 0x62b000002e38 at pc 0x5611da276999 bp 0x7ffce8f1d1a0 sp 0x7ffce8f1d190 WRITE of size 8 at 0x62b000002e38 thread T0 #0 0x5611da276998 in __list_del /home/work/linux/tools/include/linux/list.h:89 #1 0x5611da276d4a in __list_del_entry /home/work/linux/tools/include/linux/list.h:102 #2 0x5611da276e77 in list_del_init /home/work/linux/tools/include/linux/list.h:145 #3 0x5611da2781cd in thread__put util/thread.c:130 #4 0x5611da2cc0a8 in __thread__zput util/thread.h:68 #5 0x5611da2d2dcb in hist_entry__delete util/hist.c:1148 #6 0x5611da2cdf91 in hists__delete_entry util/hist.c:337 #7 0x5611da2ce19e in hists__delete_entries util/hist.c:365 #8 0x5611da2db2ab in hists__delete_all_entries util/hist.c:2639 #9 0x5611da2db325 in hists_evsel__exit util/hist.c:2651 #10 0x5611da1c5352 in perf_evsel__exit util/evsel.c:1304 #11 0x5611da1c5390 in perf_evsel__delete util/evsel.c:1309 #12 0x5611da1b35f0 in perf_evlist__purge util/evlist.c:124 #13 0x5611da1b38e2 in perf_evlist__delete util/evlist.c:148 #14 0x5611da069781 in cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1645 #15 0x5611da17d038 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #16 0x5611da17d577 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #17 0x5611da17d97b in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #18 0x5611da17e0e9 in main /home/changbin/work/linux/tools/perf/perf.c:520 #19 0x7fdcc970f09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) #20 0x5611d9ff35c9 in _start (/home/work/linux/tools/perf/perf+0x3e95c9) 0x62b000002e38 is located 11320 bytes inside of 27448-byte region [0x62b000000200,0x62b000006d38) freed by thread T0 here: #0 0x7fdccb04ab70 in free (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedb70) #1 0x5611da260df4 in perf_session__delete util/session.c:201 #2 0x5611da063de5 in __cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1300 #3 0x5611da06973c in cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1642 #4 0x5611da17d038 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #5 0x5611da17d577 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #6 0x5611da17d97b in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #7 0x5611da17e0e9 in main /home/changbin/work/linux/tools/perf/perf.c:520 #8 0x7fdcc970f09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) previously allocated by thread T0 here: #0 0x7fdccb04b138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x5611da26010c in zalloc util/util.h:23 #2 0x5611da260824 in perf_session__new util/session.c:118 #3 0x5611da0633a6 in __cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1192 #4 0x5611da06973c in cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1642 #5 0x5611da17d038 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #6 0x5611da17d577 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #7 0x5611da17d97b in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #8 0x5611da17e0e9 in main /home/changbin/work/linux/tools/perf/perf.c:520 #9 0x7fdcc970f09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) SUMMARY: AddressSanitizer: heap-use-after-free /home/work/linux/tools/include/linux/list.h:89 in __list_del Shadow bytes around the buggy address: 0x0c567fff8570: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff8580: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff8590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff85a0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff85b0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd =>0x0c567fff85c0: fd fd fd fd fd fd fd[fd]fd fd fd fd fd fd fd fd 0x0c567fff85d0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff85e0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff85f0: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff8600: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd 0x0c567fff8610: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb ==27350==ABORTING Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lkml.kernel.org/r/20190316080556.3075-8-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 42 +++++++++++++++++++----------------------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 231a90daa958..614f278235fa 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1189,23 +1189,19 @@ static int __cmd_top(struct perf_top *top) pthread_t thread, thread_process; int ret; - top->session = perf_session__new(NULL, false, NULL); - if (top->session == NULL) - return -1; - if (!top->annotation_opts.objdump_path) { ret = perf_env__lookup_objdump(&top->session->header.env, &top->annotation_opts.objdump_path); if (ret) - goto out_delete; + return ret; } ret = callchain_param__setup_sample_type(&callchain_param); if (ret) - goto out_delete; + return ret; if (perf_session__register_idle_thread(top->session) < 0) - goto out_delete; + return ret; if (top->nr_threads_synthesize > 1) perf_set_multithreaded(); @@ -1227,13 +1223,18 @@ static int __cmd_top(struct perf_top *top) if (perf_hpp_list.socket) { ret = perf_env__read_cpu_topology_map(&perf_env); - if (ret < 0) - goto out_err_cpu_topo; + if (ret < 0) { + char errbuf[BUFSIZ]; + const char *err = str_error_r(-ret, errbuf, sizeof(errbuf)); + + ui__error("Could not read the CPU topology map: %s\n", err); + return ret; + } } ret = perf_top__start_counters(top); if (ret) - goto out_delete; + return ret; top->session->evlist = top->evlist; perf_session__set_id_hdr_size(top->session); @@ -1252,7 +1253,7 @@ static int __cmd_top(struct perf_top *top) ret = -1; if (pthread_create(&thread_process, NULL, process_thread, top)) { ui__error("Could not create process thread.\n"); - goto out_delete; + return ret; } if (pthread_create(&thread, NULL, (use_browser > 0 ? display_thread_tui : @@ -1296,19 +1297,7 @@ out_join: out_join_thread: pthread_cond_signal(&top->qe.cond); pthread_join(thread_process, NULL); -out_delete: - perf_session__delete(top->session); - top->session = NULL; - return ret; - -out_err_cpu_topo: { - char errbuf[BUFSIZ]; - const char *err = str_error_r(-ret, errbuf, sizeof(errbuf)); - - ui__error("Could not read the CPU topology map: %s\n", err); - goto out_delete; -} } static int @@ -1639,10 +1628,17 @@ int cmd_top(int argc, const char **argv) signal(SIGWINCH, winch_sig); } + top.session = perf_session__new(NULL, false, NULL); + if (top.session == NULL) { + status = -1; + goto out_delete_evlist; + } + status = __cmd_top(&top); out_delete_evlist: perf_evlist__delete(top.evlist); + perf_session__delete(top.session); return status; } -- cgit From 70c819e4bf1c5f492768b399d898d458ccdad2b6 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:48 +0800 Subject: perf top: Fix error handling in cmd_top() We should go to the cleanup path, to avoid leaks, detected using gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lkml.kernel.org/r/20190316080556.3075-9-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 614f278235fa..2508a7a552fa 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1617,8 +1617,9 @@ int cmd_top(int argc, const char **argv) annotation_config__init(); symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); - if (symbol__init(NULL) < 0) - return -1; + status = symbol__init(NULL); + if (status < 0) + goto out_delete_evlist; sort__setup_elide(stdout); -- cgit From cb6186aeffda4d27e56066c79e9579e7831541d3 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:49 +0800 Subject: perf hist: Add missing map__put() in error case We need to map__put() before returning from failure of sample__resolve_callchain(). Detected with gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Krister Johansen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 9c68ae98c6f7 ("perf callchain: Reference count maps") Link: http://lkml.kernel.org/r/20190316080556.3075-10-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/hist.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 1f230285d78a..7ace7a10054d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1111,8 +1111,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); - if (err) + if (err) { + map__put(alm); return err; + } err = iter->ops->prepare_entry(iter, al); if (err) -- cgit From b49265e04410b97b31a5ee66ef6782c1b2d6cd2c Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:50 +0800 Subject: perf map: Remove map from 'names' tree in __maps__remove() There are two trees for each map inserted by maps__insert(), so remove it from the 'names' tree in __maps__remove(). Detected with gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section") Link: http://lkml.kernel.org/r/20190316080556.3075-11-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index fbeb0c6efaa6..64bea5eb8bf6 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -917,6 +917,9 @@ static void __maps__remove(struct maps *maps, struct map *map) { rb_erase_init(&map->rb_node, &maps->entries); map__put(map); + + rb_erase_init(&map->rb_node_name, &maps->names); + map__put(map); } void maps__remove(struct maps *maps, struct map *map) -- cgit From da3a53a7390a89391bd63bead0c2e9af4c5ef3d6 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:51 +0800 Subject: perf maps: Purge all maps from the 'names' tree Add function __maps__purge_names() to purge all maps from the names tree. We need to cleanup the names tree in maps__exit(). Detected with gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Eric Saint-Etienne Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 1e6285699b30 ("perf symbols: Fix slowness due to -ffunction-section") Link: http://lkml.kernel.org/r/20190316080556.3075-12-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 64bea5eb8bf6..e32628cd20a7 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -577,10 +577,25 @@ static void __maps__purge(struct maps *maps) } } +static void __maps__purge_names(struct maps *maps) +{ + struct rb_root *root = &maps->names; + struct rb_node *next = rb_first(root); + + while (next) { + struct map *pos = rb_entry(next, struct map, rb_node_name); + + next = rb_next(&pos->rb_node_name); + rb_erase_init(&pos->rb_node_name, root); + map__put(pos); + } +} + static void maps__exit(struct maps *maps) { down_write(&maps->lock); __maps__purge(maps); + __maps__purge_names(maps); up_write(&maps->lock); } -- cgit From 1e5b0cf8672e622257df024074e6e09bfbcb7750 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:52 +0800 Subject: perf top: Fix global-buffer-overflow issue The array str[] should have six elements. ================================================================= ==4322==ERROR: AddressSanitizer: global-buffer-overflow on address 0x56463844e300 at pc 0x564637e7ad0d bp 0x7f30c8c89d10 sp 0x7f30c8c89d00 READ of size 8 at 0x56463844e300 thread T9 #0 0x564637e7ad0c in __ordered_events__flush util/ordered-events.c:316 #1 0x564637e7b0e4 in ordered_events__flush util/ordered-events.c:338 #2 0x564637c6a57d in process_thread /home/changbin/work/linux/tools/perf/builtin-top.c:1073 #3 0x7f30d173a163 in start_thread (/lib/x86_64-linux-gnu/libpthread.so.0+0x8163) #4 0x7f30cfffbdee in __clone (/lib/x86_64-linux-gnu/libc.so.6+0x11adee) 0x56463844e300 is located 32 bytes to the left of global variable 'flags' defined in 'util/trace-event-parse.c:229:26' (0x56463844e320) of size 192 0x56463844e300 is located 0 bytes to the right of global variable 'str' defined in 'util/ordered-events.c:268:28' (0x56463844e2e0) of size 32 SUMMARY: AddressSanitizer: global-buffer-overflow util/ordered-events.c:316 in __ordered_events__flush Shadow bytes around the buggy address: 0x0ac947081c10: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c20: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c30: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c40: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c50: 00 00 00 00 00 00 00 00 f9 f9 f9 f9 00 00 00 00 =>0x0ac947081c60:[f9]f9 f9 f9 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c70: 00 00 00 00 00 00 00 00 00 00 00 00 f9 f9 f9 f9 0x0ac947081c80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081c90: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081ca0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x0ac947081cb0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Shadow byte legend (one shadow byte represents 8 application bytes): Addressable: 00 Partially addressable: 01 02 03 04 05 06 07 Heap left redzone: fa Freed heap region: fd Stack left redzone: f1 Stack mid redzone: f2 Stack right redzone: f3 Stack after return: f5 Stack use after scope: f8 Global redzone: f9 Global init order: f6 Poisoned by user: f7 Container overflow: fc Array cookie: ac Intra object redzone: bb ASan internal: fe Left alloca redzone: ca Right alloca redzone: cb Thread T9 created by T0 here: #0 0x7f30d179de5f in __interceptor_pthread_create (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x4ae5f) #1 0x564637c6b954 in __cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1253 #2 0x564637c7173c in cmd_top /home/changbin/work/linux/tools/perf/builtin-top.c:1642 #3 0x564637d85038 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #4 0x564637d85577 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #5 0x564637d8597b in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #6 0x564637d860e9 in main /home/changbin/work/linux/tools/perf/perf.c:520 #7 0x7f30cff0509a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Cc: Jiri Olsa Fixes: 16c66bc167cc ("perf top: Add processing thread") Fixes: 68ca5d07de20 ("perf ordered_events: Add ordered_events__flush_time interface") Link: http://lkml.kernel.org/r/20190316080556.3075-13-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/ordered-events.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index ea523d3b248f..989fed6f43b5 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -270,6 +270,8 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how, "FINAL", "ROUND", "HALF ", + "TOP ", + "TIME ", }; int err; bool show_progress = false; -- cgit From 42dfa451d825a2ad15793c476f73e7bbc0f9d312 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Mar 2019 16:41:28 -0300 Subject: perf evsel: Free evsel->counts in perf_evsel__exit() Using gcc's ASan, Changbin reports: ================================================================= ==7494==ERROR: LeakSanitizer: detected memory leaks Direct leak of 48 byte(s) in 1 object(s) allocated from: #0 0x7f0333a89138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x5625e5330a5e in zalloc util/util.h:23 #2 0x5625e5330a9b in perf_counts__new util/counts.c:10 #3 0x5625e5330ca0 in perf_evsel__alloc_counts util/counts.c:47 #4 0x5625e520d8e5 in __perf_evsel__read_on_cpu util/evsel.c:1505 #5 0x5625e517a985 in perf_evsel__read_on_cpu /home/work/linux/tools/perf/util/evsel.h:347 #6 0x5625e517ad1a in test__openat_syscall_event tests/openat-syscall.c:47 #7 0x5625e51528e6 in run_test tests/builtin-test.c:358 #8 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #9 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #10 0x5625e515572f in cmd_test tests/builtin-test.c:722 #11 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #12 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #13 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #14 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #15 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Indirect leak of 72 byte(s) in 1 object(s) allocated from: #0 0x7f0333a89138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x5625e532560d in zalloc util/util.h:23 #2 0x5625e532566b in xyarray__new util/xyarray.c:10 #3 0x5625e5330aba in perf_counts__new util/counts.c:15 #4 0x5625e5330ca0 in perf_evsel__alloc_counts util/counts.c:47 #5 0x5625e520d8e5 in __perf_evsel__read_on_cpu util/evsel.c:1505 #6 0x5625e517a985 in perf_evsel__read_on_cpu /home/work/linux/tools/perf/util/evsel.h:347 #7 0x5625e517ad1a in test__openat_syscall_event tests/openat-syscall.c:47 #8 0x5625e51528e6 in run_test tests/builtin-test.c:358 #9 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #10 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #11 0x5625e515572f in cmd_test tests/builtin-test.c:722 #12 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #13 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #14 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #15 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #16 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) His patch took care of evsel->prev_raw_counts, but the above backtraces are about evsel->counts, so fix that instead. Reported-by: Changbin Du Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lkml.kernel.org/n/tip-hd1x13g59f0nuhe4anxhsmfp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 16f02b83b23f..1a2023da5d9c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1292,6 +1292,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); assert(evsel->evlist == NULL); + perf_evsel__free_counts(evsel); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); perf_evsel__free_config_terms(evsel); -- cgit From 93faa52e8371f0291ee1ff4994edae2b336b6233 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:54 +0800 Subject: perf tests: Fix a memory leak of cpu_map object in the openat_syscall_event_on_all_cpus test ================================================================= ==7497==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f0333a88f30 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedf30) #1 0x5625e5326213 in cpu_map__trim_new util/cpumap.c:45 #2 0x5625e5326703 in cpu_map__read util/cpumap.c:103 #3 0x5625e53267ef in cpu_map__read_all_cpu_map util/cpumap.c:120 #4 0x5625e5326915 in cpu_map__new util/cpumap.c:135 #5 0x5625e517b355 in test__openat_syscall_event_on_all_cpus tests/openat-syscall-all-cpus.c:36 #6 0x5625e51528e6 in run_test tests/builtin-test.c:358 #7 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #8 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #9 0x5625e515572f in cmd_test tests/builtin-test.c:722 #10 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #11 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #12 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #13 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #14 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: f30a79b012e5 ("perf tools: Add reference counting for cpu_map object") Link: http://lkml.kernel.org/r/20190316080556.3075-15-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/openat-syscall-all-cpus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index c531e6deb104..493ecb611540 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -45,7 +45,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); - goto out_thread_map_delete; + goto out_cpu_map_delete; } if (perf_evsel__open(evsel, cpus, threads) < 0) { @@ -119,6 +119,8 @@ out_close_fd: perf_evsel__close_fd(evsel); out_evsel_delete: perf_evsel__delete(evsel); +out_cpu_map_delete: + cpu_map__put(cpus); out_thread_map_delete: thread_map__put(threads); return err; -- cgit From f97a8991d3b998e518f56794d879f645964de649 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:55 +0800 Subject: perf tests: Fix memory leak by expr__find_other() in test__expr() ================================================================= ==7506==ERROR: LeakSanitizer: detected memory leaks Direct leak of 13 byte(s) in 3 object(s) allocated from: #0 0x7f03339d6070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x5625e53aaef0 in expr__find_other util/expr.y:221 #2 0x5625e51bcd3f in test__expr tests/expr.c:52 #3 0x5625e51528e6 in run_test tests/builtin-test.c:358 #4 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #5 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #6 0x5625e515572f in cmd_test tests/builtin-test.c:722 #7 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #8 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #9 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #10 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #11 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 075167363f8b ("perf tools: Add a simple expression parser for JSON") Link: http://lkml.kernel.org/r/20190316080556.3075-16-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/expr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 01f0706995a9..9acc1e80b936 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -19,7 +19,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) const char *p; const char **other; double val; - int ret; + int i, ret; struct parse_ctx ctx; int num_other; @@ -56,6 +56,9 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); TEST_ASSERT_VAL("find other", other[3] == NULL); + + for (i = 0; i < num_other; i++) + free((void *)other[i]); free((void *)other); return 0; -- cgit From d982b33133284fa7efa0e52ae06b88f9be3ea764 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:56 +0800 Subject: perf tests: Fix a memory leak in test__perf_evsel__tp_sched_test() ================================================================= ==20875==ERROR: LeakSanitizer: detected memory leaks Direct leak of 1160 byte(s) in 1 object(s) allocated from: #0 0x7f1b6fc84138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x55bd50005599 in zalloc util/util.h:23 #2 0x55bd500068f5 in perf_evsel__newtp_idx util/evsel.c:327 #3 0x55bd4ff810fc in perf_evsel__newtp /home/work/linux/tools/perf/util/evsel.h:216 #4 0x55bd4ff81608 in test__perf_evsel__tp_sched_test tests/evsel-tp-sched.c:69 #5 0x55bd4ff528e6 in run_test tests/builtin-test.c:358 #6 0x55bd4ff52baf in test_and_print tests/builtin-test.c:388 #7 0x55bd4ff543fe in __cmd_test tests/builtin-test.c:583 #8 0x55bd4ff5572f in cmd_test tests/builtin-test.c:722 #9 0x55bd4ffc4087 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #10 0x55bd4ffc45c6 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #11 0x55bd4ffc49ca in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #12 0x55bd4ffc5138 in main /home/changbin/work/linux/tools/perf/perf.c:520 #13 0x7f1b6e34809a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Indirect leak of 19 byte(s) in 1 object(s) allocated from: #0 0x7f1b6fc83f30 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedf30) #1 0x7f1b6e3ac30f in vasprintf (/lib/x86_64-linux-gnu/libc.so.6+0x8830f) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 6a6cd11d4e57 ("perf test: Add test for the sched tracepoint format fields") Link: http://lkml.kernel.org/r/20190316080556.3075-17-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/evsel-tp-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index ea7acf403727..71f60c0f9faa 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -85,5 +85,6 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; + perf_evsel__delete(evsel); return ret; } -- cgit From 71184c6ab7e60fd59d8dbc8fed62a1c753dc4934 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:37 -0700 Subject: perf record: Replace option --bpf-event with --no-bpf-event Currently, monitoring of BPF programs through bpf_event is off by default for 'perf record'. To turn it on, the user need to use option "--bpf-event". As BPF gets wider adoption in different subsystems, this option becomes inconvenient. This patch makes bpf_event on by default, and adds option "--no-bpf-event" to turn it off. Since option --bpf-event is not released yet, it is safe to remove it. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: kernel-team@fb.com Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/perf.h | 2 +- tools/perf/util/bpf-event.c | 2 +- tools/perf/util/evsel.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e7144a1c1c82..f29874192d3e 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1891,7 +1891,7 @@ static struct option __record_options[] = { OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize, "synthesize non-sample events at the end of output"), OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"), - OPT_BOOLEAN(0, "bpf-event", &record.opts.bpf_event, "record bpf events"), + OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"), OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq, "Fail if the specified frequency can't be used"), OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'", diff --git a/tools/perf/perf.h b/tools/perf/perf.h index b120e547ddc7..c59743def8d3 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -66,7 +66,7 @@ struct record_opts { bool ignore_missing_thread; bool strict_freq; bool sample_id; - bool bpf_event; + bool no_bpf_event; unsigned int freq; unsigned int mmap_pages; unsigned int auxtrace_mmap_pages; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 028c8ec1f62a..ea012b735a37 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -187,7 +187,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, } /* Synthesize PERF_RECORD_BPF_EVENT */ - if (opts->bpf_event) { + if (!opts->no_bpf_event) { *bpf_event = (struct bpf_event){ .header = { .type = PERF_RECORD_BPF_EVENT, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1a2023da5d9c..7835e05f0c0a 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1036,7 +1036,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, attr->mmap2 = track && !perf_missing_features.mmap2; attr->comm = track; attr->ksymbol = track && !perf_missing_features.ksymbol; - attr->bpf_event = track && opts->bpf_event && + attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf_event; if (opts->record_namespaces) -- cgit From 34be16466d4dc06f3d604dafbcdb3327b72e78da Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:38 -0700 Subject: tools lib bpf: Introduce bpf_program__get_prog_info_linear() Currently, bpf_prog_info includes 9 arrays. The user has the option to fetch any combination of these arrays. However, this requires a lot of handling. This work becomes more tricky when we need to store bpf_prog_info to a file, because these arrays are allocated independently. This patch introduces 'struct bpf_prog_info_linear', which stores arrays of bpf_prog_info in continuous memory. Helper functions are introduced to unify the work to get different sets of bpf_prog_info. Specifically, bpf_program__get_prog_info_linear() allows the user to select which arrays to fetch, and handles details for the user. Please see the comments right before 'enum bpf_prog_info_array' for more details and examples. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Acked-by: Daniel Borkmann Link: https://lkml.kernel.org/r/ce92c091-e80d-a0c1-4aa0-987706c42b20@iogearbox.net Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: kernel-team@fb.com Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-3-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/bpf/libbpf.c | 251 +++++++++++++++++++++++++++++++++++++++++++++++ tools/lib/bpf/libbpf.h | 63 ++++++++++++ tools/lib/bpf/libbpf.map | 3 + 3 files changed, 317 insertions(+) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 4884557aa17f..8fb6e89b4b2c 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -112,6 +112,11 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ #endif +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + struct bpf_capabilities { /* v4.14: kernel support for program & map names. */ __u32 name:1; @@ -2997,3 +3002,249 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, ring_buffer_write_tail(header, data_tail); return ret; } + +struct bpf_prog_info_array_desc { + int array_offset; /* e.g. offset of jited_prog_insns */ + int count_offset; /* e.g. offset of jited_prog_len */ + int size_offset; /* > 0: offset of rec size, + * < 0: fix size of -size_offset + */ +}; + +static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { + [BPF_PROG_INFO_JITED_INSNS] = { + offsetof(struct bpf_prog_info, jited_prog_insns), + offsetof(struct bpf_prog_info, jited_prog_len), + -1, + }, + [BPF_PROG_INFO_XLATED_INSNS] = { + offsetof(struct bpf_prog_info, xlated_prog_insns), + offsetof(struct bpf_prog_info, xlated_prog_len), + -1, + }, + [BPF_PROG_INFO_MAP_IDS] = { + offsetof(struct bpf_prog_info, map_ids), + offsetof(struct bpf_prog_info, nr_map_ids), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_JITED_KSYMS] = { + offsetof(struct bpf_prog_info, jited_ksyms), + offsetof(struct bpf_prog_info, nr_jited_ksyms), + -(int)sizeof(__u64), + }, + [BPF_PROG_INFO_JITED_FUNC_LENS] = { + offsetof(struct bpf_prog_info, jited_func_lens), + offsetof(struct bpf_prog_info, nr_jited_func_lens), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_FUNC_INFO] = { + offsetof(struct bpf_prog_info, func_info), + offsetof(struct bpf_prog_info, nr_func_info), + offsetof(struct bpf_prog_info, func_info_rec_size), + }, + [BPF_PROG_INFO_LINE_INFO] = { + offsetof(struct bpf_prog_info, line_info), + offsetof(struct bpf_prog_info, nr_line_info), + offsetof(struct bpf_prog_info, line_info_rec_size), + }, + [BPF_PROG_INFO_JITED_LINE_INFO] = { + offsetof(struct bpf_prog_info, jited_line_info), + offsetof(struct bpf_prog_info, nr_jited_line_info), + offsetof(struct bpf_prog_info, jited_line_info_rec_size), + }, + [BPF_PROG_INFO_PROG_TAGS] = { + offsetof(struct bpf_prog_info, prog_tags), + offsetof(struct bpf_prog_info, nr_prog_tags), + -(int)sizeof(__u8) * BPF_TAG_SIZE, + }, + +}; + +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u32)]; + return -(int)offset; +} + +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u64)]; + return -(int)offset; +} + +static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, + __u32 val) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + array[offset / sizeof(__u32)] = val; +} + +static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, + __u64 val) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + array[offset / sizeof(__u64)] = val; +} + +struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 data_len = 0; + int i, err; + void *ptr; + + if (arrays >> BPF_PROG_INFO_LAST_ARRAY) + return ERR_PTR(-EINVAL); + + /* step 1: get array dimensions */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + return ERR_PTR(-EFAULT); + } + + /* step 2: calculate total size of all arrays */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + bool include_array = (arrays & (1UL << i)) > 0; + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + desc = bpf_prog_info_array_desc + i; + + /* kernel is too old to support this field */ + if (info_len < desc->array_offset + sizeof(__u32) || + info_len < desc->count_offset + sizeof(__u32) || + (desc->size_offset > 0 && info_len < desc->size_offset)) + include_array = false; + + if (!include_array) { + arrays &= ~(1UL << i); /* clear the bit */ + continue; + } + + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + + data_len += count * size; + } + + /* step 3: allocate continuous memory */ + data_len = roundup(data_len, sizeof(__u64)); + info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); + if (!info_linear) + return ERR_PTR(-ENOMEM); + + /* step 4: fill data to info_linear->info */ + info_linear->arrays = arrays; + memset(&info_linear->info, 0, sizeof(info)); + ptr = info_linear->data; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->count_offset, count); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->size_offset, size); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, + ptr_to_u64(ptr)); + ptr += count * size; + } + + /* step 5: call syscall again to get required arrays */ + err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + free(info_linear); + return ERR_PTR(-EFAULT); + } + + /* step 6: verify the data */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 v1, v2; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->count_offset); + if (v1 != v2) + pr_warning("%s: mismatch in element count\n", __func__); + + v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->size_offset); + if (v1 != v2) + pr_warning("%s: mismatch in rec size\n", __func__); + } + + /* step 7: update info_len and data_len */ + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + + return info_linear; +} + +void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + addr = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + offs = addr - ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, offs); + } +} + +void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + offs = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + addr = offs + ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, addr); + } +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index aa1521a51687..c70785cc8ef5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -378,6 +378,69 @@ LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +/* + * Get bpf_prog_info in continuous memory + * + * struct bpf_prog_info has multiple arrays. The user has option to choose + * arrays to fetch from kernel. The following APIs provide an uniform way to + * fetch these data. All arrays in bpf_prog_info are stored in a single + * continuous memory region. This makes it easy to store the info in a + * file. + * + * Before writing bpf_prog_info_linear to files, it is necessary to + * translate pointers in bpf_prog_info to offsets. Helper functions + * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr() + * are introduced to switch between pointers and offsets. + * + * Examples: + * # To fetch map_ids and prog_tags: + * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) | + * (1UL << BPF_PROG_INFO_PROG_TAGS); + * struct bpf_prog_info_linear *info_linear = + * bpf_program__get_prog_info_linear(fd, arrays); + * + * # To save data in file + * bpf_program__bpil_addr_to_offs(info_linear); + * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); + * + * # To read data from file + * read(f, info_linear, ); + * bpf_program__bpil_offs_to_addr(info_linear); + */ +enum bpf_prog_info_array { + BPF_PROG_INFO_FIRST_ARRAY = 0, + BPF_PROG_INFO_JITED_INSNS = 0, + BPF_PROG_INFO_XLATED_INSNS, + BPF_PROG_INFO_MAP_IDS, + BPF_PROG_INFO_JITED_KSYMS, + BPF_PROG_INFO_JITED_FUNC_LENS, + BPF_PROG_INFO_FUNC_INFO, + BPF_PROG_INFO_LINE_INFO, + BPF_PROG_INFO_JITED_LINE_INFO, + BPF_PROG_INFO_PROG_TAGS, + BPF_PROG_INFO_LAST_ARRAY, +}; + +struct bpf_prog_info_linear { + /* size of struct bpf_prog_info, when the tool is compiled */ + __u32 info_len; + /* total bytes allocated for data, round up to 8 bytes */ + __u32 data_len; + /* which arrays are included in data */ + __u64 arrays; + struct bpf_prog_info info; + __u8 data[]; +}; + +LIBBPF_API struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays); + +LIBBPF_API void +bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); + +LIBBPF_API void +bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 778a26702a70..f3ce50500cf2 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -153,4 +153,7 @@ LIBBPF_0.0.2 { xsk_socket__delete; xsk_umem__fd; xsk_socket__fd; + bpf_program__get_prog_info_linear; + bpf_program__bpil_addr_to_offs; + bpf_program__bpil_offs_to_addr; } LIBBPF_0.0.1; -- cgit From cae73f2339231d61022769f09c94e4500e8ad47a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:39 -0700 Subject: bpftool: use bpf_program__get_prog_info_linear() in prog.c:do_dump() This patches uses bpf_program__get_prog_info_linear() to simplify the logic in prog.c do_dump(). Committer testing: Before: # bpftool prog dump xlated id 208 > /tmp/dump.xlated.before # bpftool prog dump jited id 208 > /tmp/dump.jited.before # bpftool map dump id 107 > /tmp/map.dump.before After: # ~acme/git/perf/tools/bpf/bpftool/bpftool map dump id 107 > /tmp/map.dump.after # ~acme/git/perf/tools/bpf/bpftool/bpftool prog dump xlated id 208 > /tmp/dump.xlated.after # ~acme/git/perf/tools/bpf/bpftool/bpftool prog dump jited id 208 > /tmp/dump.jited.after # diff -u /tmp/dump.xlated.before /tmp/dump.xlated.after # diff -u /tmp/dump.jited.before /tmp/dump.jited.after # diff -u /tmp/map.dump.before /tmp/map.dump.after # ~acme/git/perf/tools/bpf/bpftool/bpftool prog dump xlated id 208 0: (bf) r6 = r1 1: (85) call bpf_get_current_pid_tgid#80800 2: (63) *(u32 *)(r10 -328) = r0 3: (bf) r2 = r10 4: (07) r2 += -328 5: (18) r1 = map[id:107] 7: (85) call __htab_map_lookup_elem#85680 8: (15) if r0 == 0x0 goto pc+1 9: (07) r0 += 56 10: (b7) r7 = 0 11: (55) if r0 != 0x0 goto pc+52 12: (bf) r1 = r10 13: (07) r1 += -328 14: (b7) r2 = 64 15: (bf) r3 = r6 16: (85) call bpf_probe_read#-46848 17: (bf) r2 = r10 18: (07) r2 += -320 19: (18) r1 = map[id:106] 21: (07) r1 += 208 22: (61) r0 = *(u32 *)(r2 +0) 23: (35) if r0 >= 0x200 goto pc+3 24: (67) r0 <<= 3 25: (0f) r0 += r1 26: (05) goto pc+1 27: (b7) r0 = 0 28: (15) if r0 == 0x0 goto pc+35 29: (71) r1 = *(u8 *)(r0 +0) 30: (15) if r1 == 0x0 goto pc+33 31: (b7) r5 = 64 32: (79) r1 = *(u64 *)(r10 -320) 33: (15) if r1 == 0x2 goto pc+2 34: (15) if r1 == 0x101 goto pc+3 35: (55) if r1 != 0x15 goto pc+19 36: (79) r3 = *(u64 *)(r6 +16) 37: (05) goto pc+1 38: (79) r3 = *(u64 *)(r6 +24) 39: (15) if r3 == 0x0 goto pc+15 40: (b7) r1 = 0 41: (63) *(u32 *)(r10 -260) = r1 42: (bf) r1 = r10 43: (07) r1 += -256 44: (b7) r2 = 256 45: (85) call bpf_probe_read_str#-46704 46: (b7) r5 = 328 47: (63) *(u32 *)(r10 -264) = r0 48: (bf) r1 = r0 49: (67) r1 <<= 32 50: (77) r1 >>= 32 51: (25) if r1 > 0xff goto pc+3 52: (07) r0 += 72 53: (57) r0 &= 255 54: (bf) r5 = r0 55: (bf) r4 = r10 56: (07) r4 += -328 57: (bf) r1 = r6 58: (18) r2 = map[id:105] 60: (18) r3 = 0xffffffff 62: (85) call bpf_perf_event_output_tp#-45104 63: (bf) r7 = r0 64: (bf) r0 = r7 65: (95) exit # Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Acked-by: Daniel Borkmann Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: kernel-team@fb.com Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-4-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/bpf/bpftool/prog.c | 266 +++++++++++------------------------------------ 1 file changed, 59 insertions(+), 207 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 8ef80d65a474..d2be5a06c339 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -401,41 +401,31 @@ static int do_show(int argc, char **argv) static int do_dump(int argc, char **argv) { - unsigned int finfo_rec_size, linfo_rec_size, jited_linfo_rec_size; - void *func_info = NULL, *linfo = NULL, *jited_linfo = NULL; - unsigned int nr_finfo, nr_linfo = 0, nr_jited_linfo = 0; + struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; - unsigned long *func_ksyms = NULL; - struct bpf_prog_info info = {}; - unsigned int *func_lens = NULL; + enum {DUMP_JITED, DUMP_XLATED} mode; const char *disasm_opt = NULL; - unsigned int nr_func_ksyms; - unsigned int nr_func_lens; + struct bpf_prog_info *info; struct dump_data dd = {}; - __u32 len = sizeof(info); + void *func_info = NULL; struct btf *btf = NULL; - unsigned int buf_size; char *filepath = NULL; bool opcodes = false; bool visual = false; char func_sig[1024]; unsigned char *buf; bool linum = false; - __u32 *member_len; - __u64 *member_ptr; + __u32 member_len; + __u64 arrays; ssize_t n; - int err; int fd; if (is_prefix(*argv, "jited")) { if (disasm_init()) return -1; - - member_len = &info.jited_prog_len; - member_ptr = &info.jited_prog_insns; + mode = DUMP_JITED; } else if (is_prefix(*argv, "xlated")) { - member_len = &info.xlated_prog_len; - member_ptr = &info.xlated_prog_insns; + mode = DUMP_XLATED; } else { p_err("expected 'xlated' or 'jited', got: %s", *argv); return -1; @@ -474,175 +464,50 @@ static int do_dump(int argc, char **argv) return -1; } - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get prog info: %s", strerror(errno)); - return -1; - } - - if (!*member_len) { - p_info("no instructions returned"); - close(fd); - return 0; - } + if (mode == DUMP_JITED) + arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; + else + arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; - buf_size = *member_len; + arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - buf = malloc(buf_size); - if (!buf) { - p_err("mem alloc failed"); - close(fd); + info_linear = bpf_program__get_prog_info_linear(fd, arrays); + close(fd); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("can't get prog info: %s", strerror(errno)); return -1; } - nr_func_ksyms = info.nr_jited_ksyms; - if (nr_func_ksyms) { - func_ksyms = malloc(nr_func_ksyms * sizeof(__u64)); - if (!func_ksyms) { - p_err("mem alloc failed"); - close(fd); - goto err_free; - } - } - - nr_func_lens = info.nr_jited_func_lens; - if (nr_func_lens) { - func_lens = malloc(nr_func_lens * sizeof(__u32)); - if (!func_lens) { - p_err("mem alloc failed"); - close(fd); + info = &info_linear->info; + if (mode == DUMP_JITED) { + if (info->jited_prog_len == 0) { + p_info("no instructions returned"); goto err_free; } - } - - nr_finfo = info.nr_func_info; - finfo_rec_size = info.func_info_rec_size; - if (nr_finfo && finfo_rec_size) { - func_info = malloc(nr_finfo * finfo_rec_size); - if (!func_info) { - p_err("mem alloc failed"); - close(fd); + buf = (unsigned char *)(info->jited_prog_insns); + member_len = info->jited_prog_len; + } else { /* DUMP_XLATED */ + if (info->xlated_prog_len == 0) { + p_err("error retrieving insn dump: kernel.kptr_restrict set?"); goto err_free; } + buf = (unsigned char *)info->xlated_prog_insns; + member_len = info->xlated_prog_len; } - linfo_rec_size = info.line_info_rec_size; - if (info.nr_line_info && linfo_rec_size && info.btf_id) { - nr_linfo = info.nr_line_info; - linfo = malloc(nr_linfo * linfo_rec_size); - if (!linfo) { - p_err("mem alloc failed"); - close(fd); - goto err_free; - } - } - - jited_linfo_rec_size = info.jited_line_info_rec_size; - if (info.nr_jited_line_info && - jited_linfo_rec_size && - info.nr_jited_ksyms && - info.nr_jited_func_lens && - info.btf_id) { - nr_jited_linfo = info.nr_jited_line_info; - jited_linfo = malloc(nr_jited_linfo * jited_linfo_rec_size); - if (!jited_linfo) { - p_err("mem alloc failed"); - close(fd); - goto err_free; - } - } - - memset(&info, 0, sizeof(info)); - - *member_ptr = ptr_to_u64(buf); - *member_len = buf_size; - info.jited_ksyms = ptr_to_u64(func_ksyms); - info.nr_jited_ksyms = nr_func_ksyms; - info.jited_func_lens = ptr_to_u64(func_lens); - info.nr_jited_func_lens = nr_func_lens; - info.nr_func_info = nr_finfo; - info.func_info_rec_size = finfo_rec_size; - info.func_info = ptr_to_u64(func_info); - info.nr_line_info = nr_linfo; - info.line_info_rec_size = linfo_rec_size; - info.line_info = ptr_to_u64(linfo); - info.nr_jited_line_info = nr_jited_linfo; - info.jited_line_info_rec_size = jited_linfo_rec_size; - info.jited_line_info = ptr_to_u64(jited_linfo); - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - close(fd); - if (err) { - p_err("can't get prog info: %s", strerror(errno)); - goto err_free; - } - - if (*member_len > buf_size) { - p_err("too many instructions returned"); - goto err_free; - } - - if (info.nr_jited_ksyms > nr_func_ksyms) { - p_err("too many addresses returned"); - goto err_free; - } - - if (info.nr_jited_func_lens > nr_func_lens) { - p_err("too many values returned"); - goto err_free; - } - - if (info.nr_func_info != nr_finfo) { - p_err("incorrect nr_func_info %d vs. expected %d", - info.nr_func_info, nr_finfo); - goto err_free; - } - - if (info.func_info_rec_size != finfo_rec_size) { - p_err("incorrect func_info_rec_size %d vs. expected %d", - info.func_info_rec_size, finfo_rec_size); - goto err_free; - } - - if (linfo && info.nr_line_info != nr_linfo) { - p_err("incorrect nr_line_info %u vs. expected %u", - info.nr_line_info, nr_linfo); - goto err_free; - } - - if (info.line_info_rec_size != linfo_rec_size) { - p_err("incorrect line_info_rec_size %u vs. expected %u", - info.line_info_rec_size, linfo_rec_size); - goto err_free; - } - - if (jited_linfo && info.nr_jited_line_info != nr_jited_linfo) { - p_err("incorrect nr_jited_line_info %u vs. expected %u", - info.nr_jited_line_info, nr_jited_linfo); - goto err_free; - } - - if (info.jited_line_info_rec_size != jited_linfo_rec_size) { - p_err("incorrect jited_line_info_rec_size %u vs. expected %u", - info.jited_line_info_rec_size, jited_linfo_rec_size); - goto err_free; - } - - if ((member_len == &info.jited_prog_len && - info.jited_prog_insns == 0) || - (member_len == &info.xlated_prog_len && - info.xlated_prog_insns == 0)) { - p_err("error retrieving insn dump: kernel.kptr_restrict set?"); - goto err_free; - } - - if (info.btf_id && btf__get_from_id(info.btf_id, &btf)) { + if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { p_err("failed to get btf"); goto err_free; } - if (nr_linfo) { - prog_linfo = bpf_prog_linfo__new(&info); + func_info = (void *)info->func_info; + + if (info->nr_line_info) { + prog_linfo = bpf_prog_linfo__new(info); if (!prog_linfo) p_info("error in processing bpf_line_info. continue without it."); } @@ -655,9 +520,9 @@ static int do_dump(int argc, char **argv) goto err_free; } - n = write(fd, buf, *member_len); + n = write(fd, buf, member_len); close(fd); - if (n != *member_len) { + if (n != member_len) { p_err("error writing output file: %s", n < 0 ? strerror(errno) : "short write"); goto err_free; @@ -665,19 +530,19 @@ static int do_dump(int argc, char **argv) if (json_output) jsonw_null(json_wtr); - } else if (member_len == &info.jited_prog_len) { + } else if (mode == DUMP_JITED) { const char *name = NULL; - if (info.ifindex) { - name = ifindex_to_bfd_params(info.ifindex, - info.netns_dev, - info.netns_ino, + if (info->ifindex) { + name = ifindex_to_bfd_params(info->ifindex, + info->netns_dev, + info->netns_ino, &disasm_opt); if (!name) goto err_free; } - if (info.nr_jited_func_lens && info.jited_func_lens) { + if (info->nr_jited_func_lens && info->jited_func_lens) { struct kernel_sym *sym = NULL; struct bpf_func_info *record; char sym_name[SYM_MAX_NAME]; @@ -685,17 +550,16 @@ static int do_dump(int argc, char **argv) __u64 *ksyms = NULL; __u32 *lens; __u32 i; - - if (info.nr_jited_ksyms) { + if (info->nr_jited_ksyms) { kernel_syms_load(&dd); - ksyms = (__u64 *) info.jited_ksyms; + ksyms = (__u64 *) info->jited_ksyms; } if (json_output) jsonw_start_array(json_wtr); - lens = (__u32 *) info.jited_func_lens; - for (i = 0; i < info.nr_jited_func_lens; i++) { + lens = (__u32 *) info->jited_func_lens; + for (i = 0; i < info->nr_jited_func_lens; i++) { if (ksyms) { sym = kernel_syms_search(&dd, ksyms[i]); if (sym) @@ -707,7 +571,7 @@ static int do_dump(int argc, char **argv) } if (func_info) { - record = func_info + i * finfo_rec_size; + record = func_info + i * info->func_info_rec_size; btf_dumper_type_only(btf, record->type_id, func_sig, sizeof(func_sig)); @@ -744,49 +608,37 @@ static int do_dump(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); } else { - disasm_print_insn(buf, *member_len, opcodes, name, + disasm_print_insn(buf, member_len, opcodes, name, disasm_opt, btf, NULL, 0, 0, false); } } else if (visual) { if (json_output) jsonw_null(json_wtr); else - dump_xlated_cfg(buf, *member_len); + dump_xlated_cfg(buf, member_len); } else { kernel_syms_load(&dd); - dd.nr_jited_ksyms = info.nr_jited_ksyms; - dd.jited_ksyms = (__u64 *) info.jited_ksyms; + dd.nr_jited_ksyms = info->nr_jited_ksyms; + dd.jited_ksyms = (__u64 *) info->jited_ksyms; dd.btf = btf; dd.func_info = func_info; - dd.finfo_rec_size = finfo_rec_size; + dd.finfo_rec_size = info->func_info_rec_size; dd.prog_linfo = prog_linfo; if (json_output) - dump_xlated_json(&dd, buf, *member_len, opcodes, + dump_xlated_json(&dd, buf, member_len, opcodes, linum); else - dump_xlated_plain(&dd, buf, *member_len, opcodes, + dump_xlated_plain(&dd, buf, member_len, opcodes, linum); kernel_syms_destroy(&dd); } - free(buf); - free(func_ksyms); - free(func_lens); - free(func_info); - free(linfo); - free(jited_linfo); - bpf_prog_linfo__free(prog_linfo); + free(info_linear); return 0; err_free: - free(buf); - free(func_ksyms); - free(func_lens); - free(func_info); - free(linfo); - free(jited_linfo); - bpf_prog_linfo__free(prog_linfo); + free(info_linear); return -1; } -- cgit From a742258af131e570a68ad8cf16cd2cc4692675a0 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:40 -0700 Subject: perf bpf: Synthesize bpf events with bpf_program__get_prog_info_linear() With bpf_program__get_prog_info_linear, we can simplify the logic that synthesizes bpf events. This patch doesn't change the behavior of the code. Commiter notes: Needed this (for all four variables), suggested by Song, to overcome build failure on debian experimental cross building to MIPS 32-bit: - u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(info->prog_tags); + u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(uintptr_t)(info->prog_tags); util/bpf-event.c: In function 'perf_event__synthesize_one_bpf_prog': util/bpf-event.c:143:35: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(info->prog_tags); ^ util/bpf-event.c:144:22: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] __u32 *prog_lens = (__u32 *)(info->jited_func_lens); ^ util/bpf-event.c:145:23: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] __u64 *prog_addrs = (__u64 *)(info->jited_ksyms); ^ util/bpf-event.c:146:22: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] void *func_infos = (void *)(info->func_info); ^ cc1: all warnings being treated as errors Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: kernel-team@fb.com Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-5-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 118 +++++++++++++++----------------------------- 1 file changed, 40 insertions(+), 78 deletions(-) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index ea012b735a37..e0cbe7f87170 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -3,7 +3,9 @@ #include #include #include +#include #include +#include #include "bpf-event.h" #include "debug.h" #include "symbol.h" @@ -49,99 +51,62 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, { struct ksymbol_event *ksymbol_event = &event->ksymbol_event; struct bpf_event *bpf_event = &event->bpf_event; - u32 sub_prog_cnt, i, func_info_rec_size = 0; - u8 (*prog_tags)[BPF_TAG_SIZE] = NULL; - struct bpf_prog_info info = { .type = 0, }; - u32 info_len = sizeof(info); - void *func_infos = NULL; - u64 *prog_addrs = NULL; + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info *info; struct btf *btf = NULL; - u32 *prog_lens = NULL; bool has_btf = false; - char errbuf[512]; + u32 sub_prog_cnt, i; int err = 0; + u64 arrays; - /* Call bpf_obj_get_info_by_fd() to get sizes of arrays */ - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; - if (err) { - pr_debug("%s: failed to get BPF program info: %s, aborting\n", - __func__, str_error_r(errno, errbuf, sizeof(errbuf))); + info_linear = bpf_program__get_prog_info_linear(fd, arrays); + if (IS_ERR_OR_NULL(info_linear)) { + info_linear = NULL; + pr_debug("%s: failed to get BPF program info. aborting\n", __func__); return -1; } - if (info_len < offsetof(struct bpf_prog_info, prog_tags)) { + + if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) { pr_debug("%s: the kernel is too old, aborting\n", __func__); return -2; } + info = &info_linear->info; + /* number of ksyms, func_lengths, and tags should match */ - sub_prog_cnt = info.nr_jited_ksyms; - if (sub_prog_cnt != info.nr_prog_tags || - sub_prog_cnt != info.nr_jited_func_lens) + sub_prog_cnt = info->nr_jited_ksyms; + if (sub_prog_cnt != info->nr_prog_tags || + sub_prog_cnt != info->nr_jited_func_lens) return -1; /* check BTF func info support */ - if (info.btf_id && info.nr_func_info && info.func_info_rec_size) { + if (info->btf_id && info->nr_func_info && info->func_info_rec_size) { /* btf func info number should be same as sub_prog_cnt */ - if (sub_prog_cnt != info.nr_func_info) { + if (sub_prog_cnt != info->nr_func_info) { pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__); - return -1; - } - if (btf__get_from_id(info.btf_id, &btf)) { - pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info.btf_id); - return -1; + err = -1; + goto out; } - func_info_rec_size = info.func_info_rec_size; - func_infos = calloc(sub_prog_cnt, func_info_rec_size); - if (!func_infos) { - pr_debug("%s: failed to allocate memory for func_infos, aborting\n", __func__); - return -1; + if (btf__get_from_id(info->btf_id, &btf)) { + pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id); + err = -1; + btf = NULL; + goto out; } has_btf = true; } - /* - * We need address, length, and tag for each sub program. - * Allocate memory and call bpf_obj_get_info_by_fd() again - */ - prog_addrs = calloc(sub_prog_cnt, sizeof(u64)); - if (!prog_addrs) { - pr_debug("%s: failed to allocate memory for prog_addrs, aborting\n", __func__); - goto out; - } - prog_lens = calloc(sub_prog_cnt, sizeof(u32)); - if (!prog_lens) { - pr_debug("%s: failed to allocate memory for prog_lens, aborting\n", __func__); - goto out; - } - prog_tags = calloc(sub_prog_cnt, BPF_TAG_SIZE); - if (!prog_tags) { - pr_debug("%s: failed to allocate memory for prog_tags, aborting\n", __func__); - goto out; - } - - memset(&info, 0, sizeof(info)); - info.nr_jited_ksyms = sub_prog_cnt; - info.nr_jited_func_lens = sub_prog_cnt; - info.nr_prog_tags = sub_prog_cnt; - info.jited_ksyms = ptr_to_u64(prog_addrs); - info.jited_func_lens = ptr_to_u64(prog_lens); - info.prog_tags = ptr_to_u64(prog_tags); - info_len = sizeof(info); - if (has_btf) { - info.nr_func_info = sub_prog_cnt; - info.func_info_rec_size = func_info_rec_size; - info.func_info = ptr_to_u64(func_infos); - } - - err = bpf_obj_get_info_by_fd(fd, &info, &info_len); - if (err) { - pr_debug("%s: failed to get BPF program info, aborting\n", __func__); - goto out; - } - /* Synthesize PERF_RECORD_KSYMBOL */ for (i = 0; i < sub_prog_cnt; i++) { + u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(uintptr_t)(info->prog_tags); + __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); + __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); + void *func_infos = (void *)(uintptr_t)(info->func_info); const struct bpf_func_info *finfo; const char *short_name = NULL; const struct btf_type *t; @@ -163,13 +128,13 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, KSYM_NAME_LEN - name_len, prog_tags[i], BPF_TAG_SIZE); if (has_btf) { - finfo = func_infos + i * info.func_info_rec_size; + finfo = func_infos + i * info->func_info_rec_size; t = btf__type_by_id(btf, finfo->type_id); short_name = btf__name_by_offset(btf, t->name_off); } else if (i == 0 && sub_prog_cnt == 1) { /* no subprog */ - if (info.name[0]) - short_name = info.name; + if (info->name[0]) + short_name = info->name; } else short_name = "F"; if (short_name) @@ -195,9 +160,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, }, .type = PERF_BPF_EVENT_PROG_LOAD, .flags = 0, - .id = info.id, + .id = info->id, }; - memcpy(bpf_event->tag, prog_tags[i], BPF_TAG_SIZE); + memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE); memset((void *)event + event->header.size, 0, machine->id_hdr_size); event->header.size += machine->id_hdr_size; err = perf_tool__process_synth_event(tool, event, @@ -205,10 +170,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, } out: - free(prog_tags); - free(prog_lens); - free(prog_addrs); - free(func_infos); + free(info_linear); free(btf); return err ? -1 : 0; } -- cgit From e5416950454fa79b7bdc86dac45661b97d887c97 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:41 -0700 Subject: perf bpf: Make synthesize_bpf_events() receive perf_session pointer instead of perf_tool This patch changes the arguments of perf_event__synthesize_bpf_events() to include perf_session* instead of perf_tool*. perf_session will be used in the next patch. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-6-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/util/bpf-event.c | 8 +++++--- tools/perf/util/bpf-event.h | 4 ++-- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index f29874192d3e..e79faccd7842 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1114,7 +1114,7 @@ static int record__synthesize(struct record *rec, bool tail) return err; } - err = perf_event__synthesize_bpf_events(tool, process_synthesized_event, + err = perf_event__synthesize_bpf_events(session, process_synthesized_event, machine, opts); if (err < 0) pr_warning("Couldn't synthesize bpf events.\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 2508a7a552fa..77e6190211d2 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1208,7 +1208,7 @@ static int __cmd_top(struct perf_top *top) init_process_thread(top); - ret = perf_event__synthesize_bpf_events(&top->tool, perf_event__process, + ret = perf_event__synthesize_bpf_events(top->session, perf_event__process, &top->session->machines.host, &top->record_opts); if (ret < 0) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index e0cbe7f87170..5237e8f11997 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -10,6 +10,7 @@ #include "debug.h" #include "symbol.h" #include "machine.h" +#include "session.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -42,7 +43,7 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused, * -1 for failures; * -2 for lack of kernel support. */ -static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, +static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, perf_event__handler_t process, struct machine *machine, int fd, @@ -52,6 +53,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_tool *tool, struct ksymbol_event *ksymbol_event = &event->ksymbol_event; struct bpf_event *bpf_event = &event->bpf_event; struct bpf_prog_info_linear *info_linear; + struct perf_tool *tool = session->tool; struct bpf_prog_info *info; struct btf *btf = NULL; bool has_btf = false; @@ -175,7 +177,7 @@ out: return err ? -1 : 0; } -int perf_event__synthesize_bpf_events(struct perf_tool *tool, +int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts) @@ -209,7 +211,7 @@ int perf_event__synthesize_bpf_events(struct perf_tool *tool, continue; } - err = perf_event__synthesize_one_bpf_prog(tool, process, + err = perf_event__synthesize_one_bpf_prog(session, process, machine, fd, event, opts); close(fd); diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 7890067e1a37..6698683612a7 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -15,7 +15,7 @@ struct record_opts; int machine__process_bpf_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); -int perf_event__synthesize_bpf_events(struct perf_tool *tool, +int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts); @@ -27,7 +27,7 @@ static inline int machine__process_bpf_event(struct machine *machine __maybe_unu return 0; } -static inline int perf_event__synthesize_bpf_events(struct perf_tool *tool __maybe_unused, +static inline int perf_event__synthesize_bpf_events(struct perf_session *session __maybe_unused, perf_event__handler_t process __maybe_unused, struct machine *machine __maybe_unused, struct record_opts *opts __maybe_unused) -- cgit From e4378f0cb90be0368c48baad69a99203c58e3196 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:42 -0700 Subject: perf bpf: Save bpf_prog_info in a rbtree in perf_env bpf_prog_info contains information necessary to annotate bpf programs. This patch saves bpf_prog_info for bpf programs loaded in the system. Some big picture of the next few patches: To fully annotate BPF programs with source code mapping, 4 different informations are needed: 1) PERF_RECORD_KSYMBOL 2) PERF_RECORD_BPF_EVENT 3) bpf_prog_info 4) btf Before this set, 1) and 2) in the list are already saved to perf.data file. For BPF programs that are already loaded before perf run, 1) and 2) are synthesized by perf_event__synthesize_bpf_events(). For short living BPF programs, 1) and 2) are generated by kernel. This set handles 3) and 4) from the list. Again, it is necessary to handle existing BPF program and short living program separately. This patch handles 3) for exising BPF programs while synthesizing 1) and 2) in perf_event__synthesize_bpf_events(). These data are stored in perf_env. The next patch saves these data from perf_env to perf.data as headers. Similarly, the two patches after the next saves 4) of existing BPF programs to perf_env and perf.data. Another patch later will handle 3) and 4) for short living BPF programs by monitoring 1) and 2) in a dedicate thread. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-7-songliubraving@fb.com [ set env->bpf_progs.infos_cnt to zero in perf_env__purge_bpf() as noted by jolsa ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf.c | 1 + tools/perf/util/bpf-event.c | 30 +++++++++++++++- tools/perf/util/bpf-event.h | 7 +++- tools/perf/util/env.c | 88 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 19 ++++++++++ tools/perf/util/session.c | 1 + 6 files changed, 144 insertions(+), 2 deletions(-) diff --git a/tools/perf/perf.c b/tools/perf/perf.c index a11cb006f968..72df4b6fa36f 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -298,6 +298,7 @@ static int run_builtin(struct cmd_struct *p, int argc, const char **argv) use_pager = 1; commit_pager_choice(); + perf_env__init(&perf_env); perf_env__set_cmdline(&perf_env, argc, argv); status = p->fn(argc, argv); perf_config__exit(); diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 5237e8f11997..37ee4e2a728a 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -10,6 +10,7 @@ #include "debug.h" #include "symbol.h" #include "machine.h" +#include "env.h" #include "session.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -54,17 +55,28 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, struct bpf_event *bpf_event = &event->bpf_event; struct bpf_prog_info_linear *info_linear; struct perf_tool *tool = session->tool; + struct bpf_prog_info_node *info_node; struct bpf_prog_info *info; struct btf *btf = NULL; bool has_btf = false; + struct perf_env *env; u32 sub_prog_cnt, i; int err = 0; u64 arrays; + /* + * for perf-record and perf-report use header.env; + * otherwise, use global perf_env. + */ + env = session->data ? &session->header.env : &perf_env; + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; + arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; info_linear = bpf_program__get_prog_info_linear(fd, arrays); if (IS_ERR_OR_NULL(info_linear)) { @@ -153,8 +165,8 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, machine, process); } - /* Synthesize PERF_RECORD_BPF_EVENT */ if (!opts->no_bpf_event) { + /* Synthesize PERF_RECORD_BPF_EVENT */ *bpf_event = (struct bpf_event){ .header = { .type = PERF_RECORD_BPF_EVENT, @@ -167,6 +179,22 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, memcpy(bpf_event->tag, info->tag, BPF_TAG_SIZE); memset((void *)event + event->header.size, 0, machine->id_hdr_size); event->header.size += machine->id_hdr_size; + + /* save bpf_prog_info to env */ + info_node = malloc(sizeof(struct bpf_prog_info_node)); + if (!info_node) { + err = -1; + goto out; + } + + info_node->info_linear = info_linear; + perf_env__insert_bpf_prog_info(env, info_node); + info_linear = NULL; + + /* + * process after saving bpf_prog_info to env, so that + * required information is ready for look up + */ err = perf_tool__process_synth_event(tool, event, machine, process); } diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 6698683612a7..fad932f7404f 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -3,14 +3,19 @@ #define __PERF_BPF_EVENT_H #include +#include #include "event.h" struct machine; union perf_event; struct perf_sample; -struct perf_tool; struct record_opts; +struct bpf_prog_info_node { + struct bpf_prog_info_linear *info_linear; + struct rb_node rb_node; +}; + #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 4c23779e271a..98cd36f0e317 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -3,15 +3,97 @@ #include "env.h" #include "sane_ctype.h" #include "util.h" +#include "bpf-event.h" #include #include +#include struct perf_env perf_env; +void perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node) +{ + __u32 prog_id = info_node->info_linear->info.id; + struct bpf_prog_info_node *node; + struct rb_node *parent = NULL; + struct rb_node **p; + + down_write(&env->bpf_progs.lock); + p = &env->bpf_progs.infos.rb_node; + + while (*p != NULL) { + parent = *p; + node = rb_entry(parent, struct bpf_prog_info_node, rb_node); + if (prog_id < node->info_linear->info.id) { + p = &(*p)->rb_left; + } else if (prog_id > node->info_linear->info.id) { + p = &(*p)->rb_right; + } else { + pr_debug("duplicated bpf prog info %u\n", prog_id); + goto out; + } + } + + rb_link_node(&info_node->rb_node, parent, p); + rb_insert_color(&info_node->rb_node, &env->bpf_progs.infos); + env->bpf_progs.infos_cnt++; +out: + up_write(&env->bpf_progs.lock); +} + +struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, + __u32 prog_id) +{ + struct bpf_prog_info_node *node = NULL; + struct rb_node *n; + + down_read(&env->bpf_progs.lock); + n = env->bpf_progs.infos.rb_node; + + while (n) { + node = rb_entry(n, struct bpf_prog_info_node, rb_node); + if (prog_id < node->info_linear->info.id) + n = n->rb_left; + else if (prog_id > node->info_linear->info.id) + n = n->rb_right; + else + break; + } + + up_read(&env->bpf_progs.lock); + return node; +} + +/* purge data in bpf_progs.infos tree */ +static void perf_env__purge_bpf(struct perf_env *env) +{ + struct rb_root *root; + struct rb_node *next; + + down_write(&env->bpf_progs.lock); + + root = &env->bpf_progs.infos; + next = rb_first(root); + + while (next) { + struct bpf_prog_info_node *node; + + node = rb_entry(next, struct bpf_prog_info_node, rb_node); + next = rb_next(&node->rb_node); + rb_erase(&node->rb_node, root); + free(node); + } + + env->bpf_progs.infos_cnt = 0; + + up_write(&env->bpf_progs.lock); +} + void perf_env__exit(struct perf_env *env) { int i; + perf_env__purge_bpf(env); zfree(&env->hostname); zfree(&env->os_release); zfree(&env->version); @@ -38,6 +120,12 @@ void perf_env__exit(struct perf_env *env) zfree(&env->memory_nodes); } +void perf_env__init(struct perf_env *env) +{ + env->bpf_progs.infos = RB_ROOT; + init_rwsem(&env->bpf_progs.lock); +} + int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]) { int i; diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index d01b8355f4ca..24b11c564ba4 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -3,7 +3,9 @@ #define __PERF_ENV_H #include +#include #include "cpumap.h" +#include "rwsem.h" struct cpu_topology_map { int socket_id; @@ -64,8 +66,20 @@ struct perf_env { struct memory_node *memory_nodes; unsigned long long memory_bsize; u64 clockid_res_ns; + + /* + * bpf_info_lock protects bpf rbtrees. This is needed because the + * trees are accessed by different threads in perf-top + */ + struct { + struct rw_semaphore lock; + struct rb_root infos; + u32 infos_cnt; + } bpf_progs; }; +struct bpf_prog_info_node; + extern struct perf_env perf_env; void perf_env__exit(struct perf_env *env); @@ -80,4 +94,9 @@ const char *perf_env__arch(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); +void perf_env__init(struct perf_env *env); +void perf_env__insert_bpf_prog_info(struct perf_env *env, + struct bpf_prog_info_node *info_node); +struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, + __u32 prog_id); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0ec34227bd60..b17f1c9bc965 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -132,6 +132,7 @@ struct perf_session *perf_session__new(struct perf_data *data, ordered_events__init(&session->ordered_events, ordered_events__deliver_event, NULL); + perf_env__init(&session->header.env); if (data) { if (perf_data__open(data)) goto out_delete; -- cgit From 606f972b1361f477cbd4e6e8ac00742fde4b39db Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:43 -0700 Subject: perf bpf: Save bpf_prog_info information as headers to perf.data This patch enables perf-record to save bpf_prog_info information as headers to perf.data. A new header type HEADER_BPF_PROG_INFO is introduced for this data. Committer testing: As root, being on the kernel sources top level directory, run: # perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c -e *msg Just to compile and load a BPF program that attaches to the raw_syscalls:sys_{enter,exit} tracepoints to trace the syscalls ending in "msg" (recvmsg, sendmsg, recvmmsg, sendmmsg, etc). Then do a systemwide perf record session for a few seconds: # perf record -a sleep 2s Then look at: # perf report --header-only | grep -i bpf # bpf_prog_info of id 13 # bpf_prog_info of id 14 # bpf_prog_info of id 15 # bpf_prog_info of id 16 # bpf_prog_info of id 17 # bpf_prog_info of id 18 # bpf_prog_info of id 21 # bpf_prog_info of id 22 # bpf_prog_info of id 208 # bpf_prog_info of id 209 # We need to show more info about these programs, like bpftool does for the ones running on the system, i.e. 'perf record/perf report' become a way of saving the BPF state in a machine to then analyse on another, together with all the other information that is already saved in the perf.data header: # perf report --header-only # ======== # captured on : Tue Mar 12 11:42:13 2019 # header version : 1 # data offset : 296 # data size : 16294184 # feat offset : 16294480 # hostname : quaco # os release : 5.0.0+ # perf version : 5.0.gd783c8 # arch : x86_64 # nrcpus online : 8 # nrcpus avail : 8 # cpudesc : Intel(R) Core(TM) i7-8650U CPU @ 1.90GHz # cpuid : GenuineIntel,6,142,10 # total memory : 24555720 kB # cmdline : /home/acme/bin/perf (deleted) record -a # event : name = cycles:ppp, , id = { 3190123, 3190124, 3190125, 3190126, 3190127, 3190128, 3190129, 3190130 }, size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|CPU|PERIOD, read_format = ID, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1 # CPU_TOPOLOGY info available, use -I to display # NUMA_TOPOLOGY info available, use -I to display # pmu mappings: intel_pt = 8, software = 1, power = 11, uprobe = 7, uncore_imc = 12, cpu = 4, cstate_core = 18, uncore_cbox_2 = 15, breakpoint = 5, uncore_cbox_0 = 13, tracepoint = 2, cstate_pkg = 19, uncore_arb = 17, kprobe = 6, i915 = 10, msr = 9, uncore_cbox_3 = 16, uncore_cbox_1 = 14 # CACHE info available, use -I to display # time of first sample : 116392.441701 # time of last sample : 116400.932584 # sample duration : 8490.883 ms # MEM_TOPOLOGY info available, use -I to display # bpf_prog_info of id 13 # bpf_prog_info of id 14 # bpf_prog_info of id 15 # bpf_prog_info of id 16 # bpf_prog_info of id 17 # bpf_prog_info of id 18 # bpf_prog_info of id 21 # bpf_prog_info of id 22 # bpf_prog_info of id 208 # bpf_prog_info of id 209 # missing features: TRACING_DATA BRANCH_STACK GROUP_DESC AUXTRACE STAT CLOCKID DIR_FORMAT # ======== # Committer notes: We can't use the libbpf unconditionally, as the build may have been with NO_LIBBPF, when we end up with linking errors, so provide dummy {process,write}_bpf_prog_info() wrapped by HAVE_LIBBPF_SUPPORT for that case. Printing are not affected by this, so can continue as is. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-8-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 153 ++++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/header.h | 1 + 2 files changed, 153 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index b0683bf4d9f3..e6a81af516f6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "evlist.h" #include "evsel.h" @@ -40,6 +41,7 @@ #include "time-utils.h" #include "units.h" #include "cputopo.h" +#include "bpf-event.h" #include "sane_ctype.h" @@ -876,6 +878,56 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } +#ifdef HAVE_LIBBPF_SUPPORT +static int write_bpf_prog_info(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + int ret; + + down_read(&env->bpf_progs.lock); + + ret = do_write(ff, &env->bpf_progs.infos_cnt, + sizeof(env->bpf_progs.infos_cnt)); + if (ret < 0) + goto out; + + root = &env->bpf_progs.infos; + next = rb_first(root); + while (next) { + struct bpf_prog_info_node *node; + size_t len; + + node = rb_entry(next, struct bpf_prog_info_node, rb_node); + next = rb_next(&node->rb_node); + len = sizeof(struct bpf_prog_info_linear) + + node->info_linear->data_len; + + /* before writing to file, translate address to offset */ + bpf_program__bpil_addr_to_offs(node->info_linear); + ret = do_write(ff, node->info_linear, len); + /* + * translate back to address even when do_write() fails, + * so that this function never changes the data. + */ + bpf_program__bpil_offs_to_addr(node->info_linear); + if (ret < 0) + goto out; + } +out: + up_read(&env->bpf_progs.lock); + return ret; +} +#else // HAVE_LIBBPF_SUPPORT +static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1367,6 +1419,29 @@ static void print_dir_format(struct feat_fd *ff, FILE *fp) fprintf(fp, "# directory data version : %"PRIu64"\n", data->dir.version); } +static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + + down_read(&env->bpf_progs.lock); + + root = &env->bpf_progs.infos; + next = rb_first(root); + + while (next) { + struct bpf_prog_info_node *node; + + node = rb_entry(next, struct bpf_prog_info_node, rb_node); + next = rb_next(&node->rb_node); + fprintf(fp, "# bpf_prog_info of id %u\n", + node->info_linear->info.id); + } + + up_read(&env->bpf_progs.lock); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2414,6 +2489,81 @@ static int process_dir_format(struct feat_fd *ff, return do_read_u64(ff, &data->dir.version); } +#ifdef HAVE_LIBBPF_SUPPORT +static int process_bpf_prog_info(struct feat_fd *ff, void *data __maybe_unused) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info_node *info_node; + struct perf_env *env = &ff->ph->env; + u32 count, i; + int err = -1; + + if (ff->ph->needs_swap) { + pr_warning("interpreting bpf_prog_info from systems with endianity is not yet supported\n"); + return 0; + } + + if (do_read_u32(ff, &count)) + return -1; + + down_write(&env->bpf_progs.lock); + + for (i = 0; i < count; ++i) { + u32 info_len, data_len; + + info_linear = NULL; + info_node = NULL; + if (do_read_u32(ff, &info_len)) + goto out; + if (do_read_u32(ff, &data_len)) + goto out; + + if (info_len > sizeof(struct bpf_prog_info)) { + pr_warning("detected invalid bpf_prog_info\n"); + goto out; + } + + info_linear = malloc(sizeof(struct bpf_prog_info_linear) + + data_len); + if (!info_linear) + goto out; + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + if (do_read_u64(ff, (u64 *)(&info_linear->arrays))) + goto out; + if (__do_read(ff, &info_linear->info, info_len)) + goto out; + if (info_len < sizeof(struct bpf_prog_info)) + memset(((void *)(&info_linear->info)) + info_len, 0, + sizeof(struct bpf_prog_info) - info_len); + + if (__do_read(ff, info_linear->data, data_len)) + goto out; + + info_node = malloc(sizeof(struct bpf_prog_info_node)); + if (!info_node) + goto out; + + /* after reading from file, translate offset to address */ + bpf_program__bpil_offs_to_addr(info_linear); + info_node->info_linear = info_linear; + perf_env__insert_bpf_prog_info(env, info_node); + } + + return 0; +out: + free(info_linear); + free(info_node); + up_write(&env->bpf_progs.lock); + return err; +} +#else // HAVE_LIBBPF_SUPPORT +static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data __maybe_unused) +{ + return 0; +} +#endif // HAVE_LIBBPF_SUPPORT + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2474,7 +2624,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(SAMPLE_TIME, sample_time, false), FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), FEAT_OPR(CLOCKID, clockid, false), - FEAT_OPN(DIR_FORMAT, dir_format, false) + FEAT_OPN(DIR_FORMAT, dir_format, false), + FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false) }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 6a231340238d..1dc85f0f895d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -40,6 +40,7 @@ enum { HEADER_MEM_TOPOLOGY, HEADER_CLOCKID, HEADER_DIR_FORMAT, + HEADER_BPF_PROG_INFO, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; -- cgit From 3792cb2ff43b1b193136a03ce1336462a827d792 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:44 -0700 Subject: perf bpf: Save BTF in a rbtree in perf_env BTF contains information necessary to annotate BPF programs. This patch saves BTF for BPF programs loaded in the system. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-9-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 23 ++++++++++++++++ tools/perf/util/bpf-event.h | 7 +++++ tools/perf/util/env.c | 67 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 5 ++++ 4 files changed, 102 insertions(+) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 37ee4e2a728a..a4fc52b4ffae 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -34,6 +34,28 @@ int machine__process_bpf_event(struct machine *machine __maybe_unused, return 0; } +static int perf_env__fetch_btf(struct perf_env *env, + u32 btf_id, + struct btf *btf) +{ + struct btf_node *node; + u32 data_size; + const void *data; + + data = btf__get_raw_data(btf, &data_size); + + node = malloc(data_size + sizeof(struct btf_node)); + if (!node) + return -1; + + node->id = btf_id; + node->data_size = data_size; + memcpy(node->data, data, data_size); + + perf_env__insert_btf(env, node); + return 0; +} + /* * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf * program. One PERF_RECORD_BPF_EVENT is generated for the program. And @@ -113,6 +135,7 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, goto out; } has_btf = true; + perf_env__fetch_btf(env, info->btf_id, btf); } /* Synthesize PERF_RECORD_KSYMBOL */ diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index fad932f7404f..b9ec394dc7c7 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -16,6 +16,13 @@ struct bpf_prog_info_node { struct rb_node rb_node; }; +struct btf_node { + struct rb_node rb_node; + u32 id; + u32 data_size; + char data[]; +}; + #ifdef HAVE_LIBBPF_SUPPORT int machine__process_bpf_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 98cd36f0e317..c6351b557bb0 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -64,6 +64,58 @@ struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, return node; } +void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node) +{ + struct rb_node *parent = NULL; + __u32 btf_id = btf_node->id; + struct btf_node *node; + struct rb_node **p; + + down_write(&env->bpf_progs.lock); + p = &env->bpf_progs.btfs.rb_node; + + while (*p != NULL) { + parent = *p; + node = rb_entry(parent, struct btf_node, rb_node); + if (btf_id < node->id) { + p = &(*p)->rb_left; + } else if (btf_id > node->id) { + p = &(*p)->rb_right; + } else { + pr_debug("duplicated btf %u\n", btf_id); + goto out; + } + } + + rb_link_node(&btf_node->rb_node, parent, p); + rb_insert_color(&btf_node->rb_node, &env->bpf_progs.btfs); + env->bpf_progs.btfs_cnt++; +out: + up_write(&env->bpf_progs.lock); +} + +struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id) +{ + struct btf_node *node = NULL; + struct rb_node *n; + + down_read(&env->bpf_progs.lock); + n = env->bpf_progs.btfs.rb_node; + + while (n) { + node = rb_entry(n, struct btf_node, rb_node); + if (btf_id < node->id) + n = n->rb_left; + else if (btf_id > node->id) + n = n->rb_right; + else + break; + } + + up_read(&env->bpf_progs.lock); + return node; +} + /* purge data in bpf_progs.infos tree */ static void perf_env__purge_bpf(struct perf_env *env) { @@ -86,6 +138,20 @@ static void perf_env__purge_bpf(struct perf_env *env) env->bpf_progs.infos_cnt = 0; + root = &env->bpf_progs.btfs; + next = rb_first(root); + + while (next) { + struct btf_node *node; + + node = rb_entry(next, struct btf_node, rb_node); + next = rb_next(&node->rb_node); + rb_erase(&node->rb_node, root); + free(node); + } + + env->bpf_progs.btfs_cnt = 0; + up_write(&env->bpf_progs.lock); } @@ -123,6 +189,7 @@ void perf_env__exit(struct perf_env *env) void perf_env__init(struct perf_env *env) { env->bpf_progs.infos = RB_ROOT; + env->bpf_progs.btfs = RB_ROOT; init_rwsem(&env->bpf_progs.lock); } diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 24b11c564ba4..4f8e2b485c01 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -75,10 +75,13 @@ struct perf_env { struct rw_semaphore lock; struct rb_root infos; u32 infos_cnt; + struct rb_root btfs; + u32 btfs_cnt; } bpf_progs; }; struct bpf_prog_info_node; +struct btf_node; extern struct perf_env perf_env; @@ -99,4 +102,6 @@ void perf_env__insert_bpf_prog_info(struct perf_env *env, struct bpf_prog_info_node *info_node); struct bpf_prog_info_node *perf_env__find_bpf_prog_info(struct perf_env *env, __u32 prog_id); +void perf_env__insert_btf(struct perf_env *env, struct btf_node *btf_node); +struct btf_node *perf_env__find_btf(struct perf_env *env, __u32 btf_id); #endif /* __PERF_ENV_H */ -- cgit From a70a1123174ab592c5fa8ecf09f9fad9b335b872 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:45 -0700 Subject: perf bpf: Save BTF information as headers to perf.data This patch enables 'perf record' to save BTF information as headers to perf.data. A new header type HEADER_BPF_BTF is introduced for this data. Committer testing: As root, being on the kernel sources top level directory, run: # perf trace -e tools/perf/examples/bpf/augmented_raw_syscalls.c -e *msg Just to compile and load a BPF program that attaches to the raw_syscalls:sys_{enter,exit} tracepoints to trace the syscalls ending in "msg" (recvmsg, sendmsg, recvmmsg, sendmmsg, etc). Make sure you have a recent enough clang, say version 9, to get the BTF ELF sections needed for this testing: # clang --version | head -1 clang version 9.0.0 (https://git.llvm.org/git/clang.git/ 7906282d3afec5dfdc2b27943fd6c0309086c507) (https://git.llvm.org/git/llvm.git/ a1b5de1ff8ae8bc79dc8e86e1f82565229bd0500) # readelf -SW tools/perf/examples/bpf/augmented_raw_syscalls.o | grep BTF [22] .BTF PROGBITS 0000000000000000 000ede 000b0e 00 0 0 1 [23] .BTF.ext PROGBITS 0000000000000000 0019ec 0002a0 00 0 0 1 [24] .rel.BTF.ext REL 0000000000000000 002fa8 000270 10 30 23 8 Then do a systemwide perf record session for a few seconds: # perf record -a sleep 2s Then look at: # perf report --header-only | grep b[pt]f # event : name = cycles:ppp, , id = { 1116204, 1116205, 1116206, 1116207, 1116208, 1116209, 1116210, 1116211 }, size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|PERIOD, read_format = ID, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, enable_on_exec = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1, ksymbol = 1, bpf_event = 1 # bpf_prog_info of id 13 # bpf_prog_info of id 14 # bpf_prog_info of id 15 # bpf_prog_info of id 16 # bpf_prog_info of id 17 # bpf_prog_info of id 18 # bpf_prog_info of id 21 # bpf_prog_info of id 22 # bpf_prog_info of id 51 # bpf_prog_info of id 52 # btf info of id 8 # We need to show more info about these BPF and BTF entries , but that can be done later. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-10-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/header.c | 101 ++++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/header.h | 1 + 2 files changed, 101 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index e6a81af516f6..01dda2f65d36 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -928,6 +928,39 @@ static int write_bpf_prog_info(struct feat_fd *ff __maybe_unused, } #endif // HAVE_LIBBPF_SUPPORT +static int write_bpf_btf(struct feat_fd *ff, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + int ret; + + down_read(&env->bpf_progs.lock); + + ret = do_write(ff, &env->bpf_progs.btfs_cnt, + sizeof(env->bpf_progs.btfs_cnt)); + + if (ret < 0) + goto out; + + root = &env->bpf_progs.btfs; + next = rb_first(root); + while (next) { + struct btf_node *node; + + node = rb_entry(next, struct btf_node, rb_node); + next = rb_next(&node->rb_node); + ret = do_write(ff, &node->id, + sizeof(u32) * 2 + node->data_size); + if (ret < 0) + goto out; + } +out: + up_read(&env->bpf_progs.lock); + return ret; +} + static int cpu_cache_level__sort(const void *a, const void *b) { struct cpu_cache_level *cache_a = (struct cpu_cache_level *)a; @@ -1442,6 +1475,28 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) up_read(&env->bpf_progs.lock); } +static void print_bpf_btf(struct feat_fd *ff, FILE *fp) +{ + struct perf_env *env = &ff->ph->env; + struct rb_root *root; + struct rb_node *next; + + down_read(&env->bpf_progs.lock); + + root = &env->bpf_progs.btfs; + next = rb_first(root); + + while (next) { + struct btf_node *node; + + node = rb_entry(next, struct btf_node, rb_node); + next = rb_next(&node->rb_node); + fprintf(fp, "# btf info of id %u\n", node->id); + } + + up_read(&env->bpf_progs.lock); +} + static void free_event_desc(struct perf_evsel *events) { struct perf_evsel *evsel; @@ -2564,6 +2619,49 @@ static int process_bpf_prog_info(struct feat_fd *ff __maybe_unused, void *data _ } #endif // HAVE_LIBBPF_SUPPORT +static int process_bpf_btf(struct feat_fd *ff, void *data __maybe_unused) +{ + struct perf_env *env = &ff->ph->env; + u32 count, i; + + if (ff->ph->needs_swap) { + pr_warning("interpreting btf from systems with endianity is not yet supported\n"); + return 0; + } + + if (do_read_u32(ff, &count)) + return -1; + + down_write(&env->bpf_progs.lock); + + for (i = 0; i < count; ++i) { + struct btf_node *node; + u32 id, data_size; + + if (do_read_u32(ff, &id)) + return -1; + if (do_read_u32(ff, &data_size)) + return -1; + + node = malloc(sizeof(struct btf_node) + data_size); + if (!node) + return -1; + + node->id = id; + node->data_size = data_size; + + if (__do_read(ff, node->data, data_size)) { + free(node); + return -1; + } + + perf_env__insert_btf(env, node); + } + + up_write(&env->bpf_progs.lock); + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2625,7 +2723,8 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPR(MEM_TOPOLOGY, mem_topology, true), FEAT_OPR(CLOCKID, clockid, false), FEAT_OPN(DIR_FORMAT, dir_format, false), - FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false) + FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false), + FEAT_OPR(BPF_BTF, bpf_btf, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 1dc85f0f895d..386da49e1bfa 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -41,6 +41,7 @@ enum { HEADER_CLOCKID, HEADER_DIR_FORMAT, HEADER_BPF_PROG_INFO, + HEADER_BPF_BTF, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; -- cgit From ee7a112fbcc8edb4cf2f84ce5fcc2da7818fd4b8 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:46 -0700 Subject: perf top: Add option --no-bpf-event This patch adds option --no-bpf-event to 'perf top', which is the same as the option of 'perf record'. The following patches will use this option. Committer testing: # perf top -vv 2> /tmp/perf_event_attr.out # cat /tmp/perf_event_attr.out ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|CPU|PERIOD read_format ID disabled 1 inherit 1 mmap 1 comm 1 freq 1 task 1 precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 bpf_event 1 ------------------------------------------------------------ # After this patch: # perf top --no-bpf-event -vv 2> /tmp/perf_event_attr.out # cat /tmp/perf_event_attr.out ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 sample_type IP|TID|TIME|CPU|PERIOD read_format ID disabled 1 inherit 1 mmap 1 comm 1 freq 1 task 1 precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 ------------------------------------------------------------ # Signed-off-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-11-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 77e6190211d2..c2ea22c4ea67 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1469,6 +1469,7 @@ int cmd_top(int argc, const char **argv) "Display raw encoding of assembly instructions (default)"), OPT_BOOLEAN(0, "demangle-kernel", &symbol_conf.demangle_kernel, "Enable kernel symbol demangling"), + OPT_BOOLEAN(0, "no-bpf-event", &top.record_opts.no_bpf_event, "do not record bpf events"), OPT_STRING(0, "objdump", &top.annotation_opts.objdump_path, "path", "objdump binary to use for disassembly and annotations"), OPT_STRING('M', "disassembler-style", &top.annotation_opts.disassembler_style, "disassembler style", -- cgit From 31be9478ed7f43d6351e0d5a2257ca76609c83d3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:47 -0700 Subject: perf feature detection: Add -lopcodes to feature-libbfd Both libbfd and libopcodes are distributed with binutil-dev/devel. When libbfd is present, it is OK to assume that libopcodes also present. This has been a safe assumption for bpftool. This patch adds -lopcodes to perf/Makefile.config. libopcodes will be used in the next commit for BPF annotation. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-12-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 0f11d5891301..df4ad45599ca 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -713,7 +713,7 @@ else endif ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd + EXTLIBS += -lbfd -lopcodes else # we are on a system that requires -liberty and (maybe) -lz # to link against -lbfd; test each case individually here @@ -724,10 +724,10 @@ else $(call feature_check,libbfd-liberty-z) ifeq ($(feature-libbfd-liberty), 1) - EXTLIBS += -lbfd -liberty + EXTLIBS += -lbfd -lopcodes -liberty else ifeq ($(feature-libbfd-liberty-z), 1) - EXTLIBS += -lbfd -liberty -lz + EXTLIBS += -lbfd -lopcodes -liberty -lz endif endif endif -- cgit From 9b86d04d53b98399017fea44e9047165ffe12d42 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:48 -0700 Subject: perf symbols: Introduce DSO_BINARY_TYPE__BPF_PROG_INFO Introduce a new dso type DSO_BINARY_TYPE__BPF_PROG_INFO for BPF programs. In symbol__disassemble(), DSO_BINARY_TYPE__BPF_PROG_INFO dso will call into a new function symbol__disassemble_bpf() in an upcoming patch, where annotation line information is filled based bpf_prog_info and btf saved in given perf_env. Committer notes: Removed the unnamed union with 'bpf_prog' and 'cache' in 'struct dso', to fix this bug when exiting 'perf top': # perf top perf: Segmentation fault -------- backtrace -------- perf[0x5a785a] /lib64/libc.so.6(+0x385bf)[0x7fd68443c5bf] perf(rb_first+0x2b)[0x4d6eeb] perf(dso__delete+0xb7)[0x4dffb7] perf[0x4f9e37] perf(perf_session__delete+0x64)[0x504df4] perf(cmd_top+0x1957)[0x454467] perf[0x4aad18] perf(main+0x61c)[0x42ec7c] /lib64/libc.so.6(__libc_start_main+0xf2)[0x7fd684428412] perf(_start+0x2d)[0x42eead] # # addr2line -fe ~/bin/perf 0x4dffb7 dso_cache__free /home/acme/git/perf/tools/perf/util/dso.c:713 That is trying to access the dso->data.cache, and that is not used with BPF programs, so we end up accessing what is in bpf_prog.first_member, b00m. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-13-songliubraving@fb.com [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 1 + tools/perf/util/dso.h | 8 ++++++++ tools/perf/util/symbol.c | 1 + 3 files changed, 10 insertions(+) diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ab8a455d2283..e059976d9d93 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -184,6 +184,7 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__KALLSYMS: case DSO_BINARY_TYPE__GUEST_KALLSYMS: case DSO_BINARY_TYPE__JAVA_JIT: + case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__NOT_FOUND: ret = -1; break; diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index bb417c54c25a..6e3f63781e51 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -14,6 +14,7 @@ struct machine; struct map; +struct perf_env; enum dso_binary_type { DSO_BINARY_TYPE__KALLSYMS = 0, @@ -35,6 +36,7 @@ enum dso_binary_type { DSO_BINARY_TYPE__KCORE, DSO_BINARY_TYPE__GUEST_KCORE, DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO, + DSO_BINARY_TYPE__BPF_PROG_INFO, DSO_BINARY_TYPE__NOT_FOUND, }; @@ -189,6 +191,12 @@ struct dso { u64 debug_frame_offset; u64 eh_frame_hdr_offset; } data; + /* bpf prog information */ + struct { + u32 id; + u32 sub_id; + struct perf_env *env; + } bpf_prog; union { /* Tool specific area */ void *priv; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 58442ca5e3c4..5cbad55cd99d 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1455,6 +1455,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO: return true; + case DSO_BINARY_TYPE__BPF_PROG_INFO: case DSO_BINARY_TYPE__NOT_FOUND: default: return false; -- cgit From 3ca3877a9732b68cf0289367a859f6c163a79bfa Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:49 -0700 Subject: perf bpf: Process PERF_BPF_EVENT_PROG_LOAD for annotation This patch adds processing of PERF_BPF_EVENT_PROG_LOAD, which sets proper DSO type/id/etc of memory regions mapped to BPF programs to DSO_BINARY_TYPE__BPF_PROG_INFO. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Cc: kernel-team@fb.com Link: http://lkml.kernel.org/r/20190312053051.2690567-14-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index a4fc52b4ffae..852e960692cb 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -12,6 +12,7 @@ #include "machine.h" #include "env.h" #include "session.h" +#include "map.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -25,12 +26,65 @@ static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) return ret; } +static int machine__process_bpf_event_load(struct machine *machine, + union perf_event *event, + struct perf_sample *sample __maybe_unused) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info_node *info_node; + struct perf_env *env = machine->env; + int id = event->bpf_event.id; + unsigned int i; + + /* perf-record, no need to handle bpf-event */ + if (env == NULL) + return 0; + + info_node = perf_env__find_bpf_prog_info(env, id); + if (!info_node) + return 0; + info_linear = info_node->info_linear; + + for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) { + u64 *addrs = (u64 *)(info_linear->info.jited_ksyms); + u64 addr = addrs[i]; + struct map *map; + + map = map_groups__find(&machine->kmaps, addr); + + if (map) { + map->dso->binary_type = DSO_BINARY_TYPE__BPF_PROG_INFO; + map->dso->bpf_prog.id = id; + map->dso->bpf_prog.sub_id = i; + map->dso->bpf_prog.env = env; + } + } + return 0; +} + int machine__process_bpf_event(struct machine *machine __maybe_unused, union perf_event *event, struct perf_sample *sample __maybe_unused) { if (dump_trace) perf_event__fprintf_bpf_event(event, stdout); + + switch (event->bpf_event.type) { + case PERF_BPF_EVENT_PROG_LOAD: + return machine__process_bpf_event_load(machine, event, sample); + + case PERF_BPF_EVENT_PROG_UNLOAD: + /* + * Do not free bpf_prog_info and btf of the program here, + * as annotation still need them. They will be freed at + * the end of the session. + */ + break; + default: + pr_debug("unexpected bpf_event type of %d\n", + event->bpf_event.type); + break; + } return 0; } -- cgit From 7442c483b963dbee7d1b655cbad99c727c047828 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 18 Mar 2019 17:35:11 +0100 Subject: mlxsw: core: mlxsw: core: avoid -Wint-in-bool-context warning A recently added function in mlxsw triggers a harmless compiler warning: In file included from drivers/net/ethernet/mellanox/mlxsw/core.h:17, from drivers/net/ethernet/mellanox/mlxsw/core_env.c:7: drivers/net/ethernet/mellanox/mlxsw/core_env.c: In function 'mlxsw_env_module_temp_thresholds_get': drivers/net/ethernet/mellanox/mlxsw/reg.h:8015:45: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] #define MLXSW_REG_MTMP_TEMP_TO_MC(val) (val * 125) ~~~~~^~~~~~ drivers/net/ethernet/mellanox/mlxsw/core_env.c:116:8: note: in expansion of macro 'MLXSW_REG_MTMP_TEMP_TO_MC' if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) { ^~~~~~~~~~~~~~~~~~~~~~~~~ The warning is normally disabled, but it would be nice to enable it to find real bugs, and there are no other known instances at the moment. Replace the negation with a zero-comparison, which also matches the comment above it. Fixes: d93c19a1d95c ("mlxsw: core: Add API for QSFP module temperature thresholds reading") Signed-off-by: Arnd Bergmann Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_env.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 7a15e932ed2f..c1c1965d7acc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -113,7 +113,7 @@ int mlxsw_env_module_temp_thresholds_get(struct mlxsw_core *core, int module, return 0; default: /* Do not consider thresholds for zero temperature. */ - if (!MLXSW_REG_MTMP_TEMP_TO_MC(module_temp)) { + if (MLXSW_REG_MTMP_TEMP_TO_MC(module_temp) == 0) { *temp = 0; return 0; } -- cgit From 223a960c01227e4dbcb6f9fa06b47d73bda21274 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 18 Mar 2019 23:36:08 +0200 Subject: net: stmmac: fix memory corruption with large MTUs When using 16K DMA buffers and ring mode, the DES3 refill is not working correctly as the function is using a bogus pointer for checking the private data. As a result stale pointers will remain in the RX descriptor ring, so DMA will now likely overwrite/corrupt some already freed memory. As simple reproducer, just receive some UDP traffic: # ifconfig eth0 down; ifconfig eth0 mtu 9000; ifconfig eth0 up # iperf3 -c 192.168.253.40 -u -b 0 -R If you didn't crash by now check the RX descriptors to find non-contiguous RX buffers: cat /sys/kernel/debug/stmmaceth/eth0/descriptors_status [...] 1 [0x2be5020]: 0xa3220321 0x9ffc1ffc 0x72d70082 0x130e207e ^^^^^^^^^^^^^^^^^^^^^ 2 [0x2be5040]: 0xa3220321 0x9ffc1ffc 0x72998082 0x1311a07e ^^^^^^^^^^^^^^^^^^^^^ A simple ping test will now report bad data: # ping -s 8200 192.168.253.40 PING 192.168.253.40 (192.168.253.40) 8200(8228) bytes of data. 8208 bytes from 192.168.253.40: icmp_seq=1 ttl=64 time=1.00 ms wrong data byte #8144 should be 0xd0 but was 0x88 Fix the wrong pointer. Also we must refill DES3 only if the DMA buffer size is 16K. Fixes: 54139cf3bb33 ("net: stmmac: adding multiple buffers for rx") Signed-off-by: Aaro Koskinen Acked-by: Jose Abreu Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/ring_mode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c index f936166d8910..4d9bcb4d0378 100644 --- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c +++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c @@ -113,10 +113,11 @@ static unsigned int is_jumbo_frm(int len, int enh_desc) static void refill_desc3(void *priv_ptr, struct dma_desc *p) { - struct stmmac_priv *priv = (struct stmmac_priv *)priv_ptr; + struct stmmac_rx_queue *rx_q = priv_ptr; + struct stmmac_priv *priv = rx_q->priv_data; /* Fill DES3 in case of RING mode */ - if (priv->dma_buf_sz >= BUF_SIZE_8KiB) + if (priv->dma_buf_sz == BUF_SIZE_16KiB) p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB); } -- cgit From a3e23f719f5c4a38ffb3d30c8d7632a4ed8ccd9e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 19 Mar 2019 10:16:53 +0800 Subject: net-sysfs: call dev_hold if kobject_init_and_add success In netdev_queue_add_kobject and rx_queue_add_kobject, if sysfs_create_group failed, kobject_put will call netdev_queue_release to decrease dev refcont, however dev_hold has not be called. So we will see this while unregistering dev: unregister_netdevice: waiting for bcsh0 to become free. Usage count = -1 Reported-by: Hulk Robot Fixes: d0d668371679 ("net: don't decrement kobj reference count on init failure") Signed-off-by: YueHaibing Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 4ff661f6f989..8f8b7b6c2945 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -928,6 +928,8 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) { @@ -937,7 +939,6 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) } kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return error; } @@ -1464,6 +1465,8 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) return error; + dev_hold(queue->dev); + #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); if (error) { @@ -1473,7 +1476,6 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) #endif kobject_uevent(kobj, KOBJ_ADD); - dev_hold(queue->dev); return 0; } -- cgit From d7737d4257459ca8921ff911c88937be1a11ea9d Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 22:19:44 -0500 Subject: nfc: Fix to check for kmemdup failure In case of kmemdup failure while setting the service name the patch returns -ENOMEM upstream for processing. Signed-off-by: Aditya Pakki Signed-off-by: David S. Miller --- net/nfc/llcp_sock.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index ae296273ce3d..17dcd0b5eb32 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -726,6 +726,10 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, llcp_sock->service_name = kmemdup(addr->service_name, llcp_sock->service_name_len, GFP_KERNEL); + if (!llcp_sock->service_name) { + ret = -ENOMEM; + goto sock_llcp_release; + } nfc_llcp_sock_link(&local->connecting_sockets, sk); @@ -745,10 +749,11 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, return ret; sock_unlink: - nfc_llcp_put_ssap(local, llcp_sock->ssap); - nfc_llcp_sock_unlink(&local->connecting_sockets, sk); +sock_llcp_release: + nfc_llcp_put_ssap(local, llcp_sock->ssap); + put_dev: nfc_put_device(dev); -- cgit From 89e4130939a20304f4059ab72179da81f5347528 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Mar 2019 05:45:35 -0700 Subject: tcp: do not use ipv6 header for ipv4 flow When a dual stack tcp listener accepts an ipv4 flow, it should not attempt to use an ipv6 header or tcp_v6_iif() helper. Fixes: 1397ed35f22d ("ipv6: add flowinfo for tcp6 pkt_options for all cases") Fixes: df3687ffc665 ("ipv6: add the IPV6_FL_F_REFLECT flag to IPV6_FL_A_GET") Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 57ef69a10889..44d431849d39 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1110,11 +1110,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; - newnp->mcast_oif = tcp_v6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; + newnp->rcv_flowinfo = 0; if (np->repflow) - newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); + newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count -- cgit From e0aa67709f89d08c8d8e5bdd9e0b649df61d0090 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Mar 2019 05:46:18 -0700 Subject: dccp: do not use ipv6 header for ipv4 flow When a dual stack dccp listener accepts an ipv4 flow, it should not attempt to use an ipv6 header or inet6_iif() helper. Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/dccp/ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index d5740bad5b18..57d84e9b7b6f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -436,8 +436,8 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk, newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; - newnp->mcast_oif = inet6_iif(skb); - newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; + newnp->mcast_oif = inet_iif(skb); + newnp->mcast_hops = ip_hdr(skb)->ttl; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count -- cgit From 54e3aca84e571559915998aa6cc05e5ac37c043b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 18 Mar 2019 21:47:09 +0300 Subject: ACPI / utils: Drop reference in test for device presence When commit 8661423eea1a ("ACPI / utils: Add new acpi_dev_present helper") introduced acpi_dev_present(), it missed the fact that bus_find_device() took a reference on the device found by it and the callers of acpi_dev_present() don't drop that reference. Drop the reference on the device in acpi_dev_present(). Fixes: 8661423eea1a ("ACPI / utils: Add new acpi_dev_present helper") Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 78db97687f26..c4b06cc075f9 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -800,6 +800,7 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) match.hrv = hrv; dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb); + put_device(dev); return !!dev; } EXPORT_SYMBOL(acpi_dev_present); -- cgit From 2071ac985d37efe496782c34318dbead93beb02f Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Tue, 12 Mar 2019 15:51:28 +0900 Subject: PM / Domains: Avoid a potential deadlock Lockdep warns that prepare_lock and genpd->mlock can cause a deadlock the deadlock scenario is like following: First thread is probing cs2000 cs2000_probe() clk_register() __clk_core_init() clk_prepare_lock() ----> acquires prepare_lock cs2000_recalc_rate() i2c_smbus_read_byte_data() rcar_i2c_master_xfer() dma_request_chan() rcar_dmac_of_xlate() rcar_dmac_alloc_chan_resources() pm_runtime_get_sync() __pm_runtime_resume() rpm_resume() rpm_callback() genpd_runtime_resume() ----> acquires genpd->mlock Second thread is attaching any device to the same PM domain genpd_add_device() genpd_lock() ----> acquires genpd->mlock cpg_mssr_attach_dev() of_clk_get_from_provider() __of_clk_get_from_provider() __clk_create_clk() clk_prepare_lock() ----> acquires prepare_lock Since currently no PM provider access genpd's critical section in .attach_dev, and .detach_dev callbacks, so there is no need to protect these two callbacks with genpd->mlock. This patch avoids a potential deadlock by moving out .attach_dev and .detach_dev from genpd->mlock, so that genpd->mlock won't be held when prepare_lock is acquired in .attach_dev and .detach_dev Signed-off-by: Jiada Wang Reviewed-by: Ulf Hansson Tested-by: Geert Uytterhoeven Reviewed-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index 76c9969b7124..96a6dc9d305c 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -1469,12 +1469,12 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, if (IS_ERR(gpd_data)) return PTR_ERR(gpd_data); - genpd_lock(genpd); - ret = genpd->attach_dev ? genpd->attach_dev(genpd, dev) : 0; if (ret) goto out; + genpd_lock(genpd); + dev_pm_domain_set(dev, &genpd->domain); genpd->device_count++; @@ -1482,9 +1482,8 @@ static int genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, list_add_tail(&gpd_data->base.list_node, &genpd->dev_list); - out: genpd_unlock(genpd); - + out: if (ret) genpd_free_dev_data(dev, gpd_data); else @@ -1533,15 +1532,15 @@ static int genpd_remove_device(struct generic_pm_domain *genpd, genpd->device_count--; genpd->max_off_time_changed = true; - if (genpd->detach_dev) - genpd->detach_dev(genpd, dev); - dev_pm_domain_set(dev, NULL); list_del_init(&pdd->list_node); genpd_unlock(genpd); + if (genpd->detach_dev) + genpd->detach_dev(genpd, dev); + genpd_free_dev_data(dev, gpd_data); return 0; -- cgit From fb6fafbc7de4a813bb5364358bbe27f71e62b24a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 19 Mar 2019 22:15:58 +0100 Subject: 3c515: fix integer overflow warning clang points out a harmless signed integer overflow: drivers/net/ethernet/3com/3c515.c:1530:66: error: implicit conversion from 'int' to 'short' changes value from 32783 to -32753 [-Werror,-Wconstant-conversion] new_mode = SetRxFilter | RxStation | RxMulticast | RxBroadcast | RxProm; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~ drivers/net/ethernet/3com/3c515.c:1532:52: error: implicit conversion from 'int' to 'short' changes value from 32775 to -32761 [-Werror,-Wconstant-conversion] new_mode = SetRxFilter | RxStation | RxMulticast | RxBroadcast; ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ drivers/net/ethernet/3com/3c515.c:1534:38: error: implicit conversion from 'int' to 'short' changes value from 32773 to -32763 [-Werror,-Wconstant-conversion] new_mode = SetRxFilter | RxStation | RxBroadcast; ~ ~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~ Make the variable unsigned to avoid the overflow. Fixes: Linux-2.1.128pre1 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/ethernet/3com/3c515.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c index 808abb6b3671..b15752267c8d 100644 --- a/drivers/net/ethernet/3com/3c515.c +++ b/drivers/net/ethernet/3com/3c515.c @@ -1521,7 +1521,7 @@ static void update_stats(int ioaddr, struct net_device *dev) static void set_rx_mode(struct net_device *dev) { int ioaddr = dev->base_addr; - short new_mode; + unsigned short new_mode; if (dev->flags & IFF_PROMISC) { if (corkscrew_debug > 3) -- cgit From f84532ce5887dac2ef67498b897a8713793eebde Mon Sep 17 00:00:00 2001 From: Vinay K Nallamothu Date: Tue, 19 Mar 2019 22:41:18 +0000 Subject: mpls: Fix 6PE forwarding This patch adds support for 6PE (RFC 4798) which uses IPv4-mapped IPv6 nexthop to connect IPv6 islands over IPv4 only MPLS network core. Prior to this fix, to find the link-layer destination mac address, 6PE enabled host/router was sending IPv6 ND requests for IPv4-mapped IPv6 nexthop address over the interface facing the IPv4 only core which wouldn't success as the core is IPv6 free. This fix changes that behavior on 6PE host to treat the nexthop as IPv4 address and send ARP requests whenever the next-hop address is an IPv4-mapped IPv6 address. Below topology illustrates the issue and how the patch addresses it. abcd::1.1.1.1 (lo) abcd::2.2.2.2 (lo) R0 (PE/host)------------------------R1--------------------------------R2 (PE/host) <--- IPv4 MPLS core ---> <------ IPv4 MPLS core --------> eth1 eth2 eth3 eth4 172.18.0.10 172.18.0.11 172.19.0.11 172.19.0.12 ffff::172.18.0.10 ffff::172.19.0.12 <------------------IPv6 MPLS tunnel ----------------------> R0 and R2 act as 6PE routers of IPv6 islands. R1 is IPv4 only with MPLS tunnels between R0,R1 and R1,R2. docker exec r0 ip -f inet6 route add abcd::2.2.2.2/128 nexthop encap mpls 100 via ::ffff:172.18.0.11 dev eth1 docker exec r2 ip -f inet6 route add abcd::1.1.1.1/128 nexthop encap mpls 200 via ::ffff:172.19.0.11 dev eth4 docker exec r1 ip -f mpls route add 100 via inet 172.19.0.12 dev eth3 docker exec r1 ip -f mpls route add 200 via inet 172.18.0.10 dev eth2 With the change, when R0 sends an IPv6 packet over MPLS tunnel to abcd::2.2.2.2, using ::ffff:172.18.0.11 as the nexthop, it does neighbor discovery for 172.18.18.0.11. Signed-off-by: Vinay K Nallamothu Tested-by: Avinash Lingala Tested-by: Aravind Srinivas Srinivasa Prabhakar Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index dda8930f20e7..f3a8557494d6 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -140,9 +140,15 @@ static int mpls_xmit(struct sk_buff *skb) if (rt) err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gateway, skb); - else if (rt6) - err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, - skb); + else if (rt6) { + if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) { + /* 6PE (RFC 4798) */ + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt6->rt6i_gateway.s6_addr32[3], + skb); + } else + err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt6->rt6i_gateway, + skb); + } if (err) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); -- cgit From 7ae622c978db6b2e28b4fced6ecd2a174492059d Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 31 Jan 2019 11:04:19 +0200 Subject: usb: dwc3: pci: add support for Comet Lake PCH ID This patch simply adds a new PCI Device ID Signed-off-by: Felipe Balbi --- drivers/usb/dwc3/dwc3-pci.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c index fdc6e4e403e8..8cced3609e24 100644 --- a/drivers/usb/dwc3/dwc3-pci.c +++ b/drivers/usb/dwc3/dwc3-pci.c @@ -29,6 +29,7 @@ #define PCI_DEVICE_ID_INTEL_BXT_M 0x1aaa #define PCI_DEVICE_ID_INTEL_APL 0x5aaa #define PCI_DEVICE_ID_INTEL_KBP 0xa2b0 +#define PCI_DEVICE_ID_INTEL_CMLH 0x02ee #define PCI_DEVICE_ID_INTEL_GLK 0x31aa #define PCI_DEVICE_ID_INTEL_CNPLP 0x9dee #define PCI_DEVICE_ID_INTEL_CNPH 0xa36e @@ -305,6 +306,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD), (kernel_ulong_t) &dwc3_pci_mrfld_properties, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH), + (kernel_ulong_t) &dwc3_pci_intel_properties, }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTLP), (kernel_ulong_t) &dwc3_pci_intel_properties, }, -- cgit From 9d6a54c1430647355a5e23434881b2ca3d192b48 Mon Sep 17 00:00:00 2001 From: Guido Kiener Date: Tue, 19 Mar 2019 19:12:03 +0100 Subject: usb: gadget: net2280: Fix overrun of OUT messages The OUT endpoint normally blocks (NAK) subsequent packets when a short packet was received and returns an incomplete queue entry to the gadget driver. Thereby the gadget driver can detect a short packet when reading queue entries with a length that is not equal to a multiple of packet size. The start_queue() function enables receiving OUT packets regardless of the content of the OUT FIFO. This results in a race: With the current code, it's possible that the "!ep->is_in && (readl(&ep->regs->ep_stat) & BIT(NAK_OUT_PACKETS))" test in start_dma() will fail, then a short packet will be received, and then start_queue() will call stop_out_naking(). That's what we don't want (OUT naking gets turned off while there is data in the FIFO) because then the next driver request might receive a mixture of old and new packets. With the patch, this race can't occur because the FIFO's state is tested after we know that OUT naking is already turned on, and OUT naking is stopped only when both of the conditions are met. This ensures that all received data is delivered to the gadget driver, which can detect a short packet now before new packets are appended to the last short packet. Acked-by: Alan Stern Signed-off-by: Guido Kiener Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/net2280.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index f63f82450bf4..e0b413e9e532 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -866,9 +866,6 @@ static void start_queue(struct net2280_ep *ep, u32 dmactl, u32 td_dma) (void) readl(&ep->dev->pci->pcimstctl); writel(BIT(DMA_START), &dma->dmastat); - - if (!ep->is_in) - stop_out_naking(ep); } static void start_dma(struct net2280_ep *ep, struct net2280_request *req) @@ -907,6 +904,7 @@ static void start_dma(struct net2280_ep *ep, struct net2280_request *req) writel(BIT(DMA_START), &dma->dmastat); return; } + stop_out_naking(ep); } tmp = dmactl_default; -- cgit From f1d3fba17cd4eeea20397f1324b7b9c69a6a935c Mon Sep 17 00:00:00 2001 From: Guido Kiener Date: Mon, 18 Mar 2019 09:18:33 +0100 Subject: usb: gadget: net2280: Fix net2280_dequeue() When a request must be dequeued with net2280_dequeue() e.g. due to a device clear action and the same request is finished by the function scan_dma_completions() then the function net2280_dequeue() does not find the request in the following search loop and returns the error -EINVAL without restoring the status ep->stopped. Thus the endpoint keeps blocked and does not receive any data anymore. This fix restores the status and does not issue an error message. Acked-by: Alan Stern Signed-off-by: Guido Kiener Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/net2280.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c index e0b413e9e532..898339e5df10 100644 --- a/drivers/usb/gadget/udc/net2280.c +++ b/drivers/usb/gadget/udc/net2280.c @@ -1273,9 +1273,9 @@ static int net2280_dequeue(struct usb_ep *_ep, struct usb_request *_req) break; } if (&req->req != _req) { + ep->stopped = stopped; spin_unlock_irqrestore(&ep->dev->lock, flags); - dev_err(&ep->dev->pdev->dev, "%s: Request mismatch\n", - __func__); + ep_dbg(ep->dev, "%s: Request mismatch\n", __func__); return -EINVAL; } -- cgit From 091dacc3cc10979ab0422f0a9f7fcc27eee97e69 Mon Sep 17 00:00:00 2001 From: Guido Kiener Date: Mon, 18 Mar 2019 09:18:34 +0100 Subject: usb: gadget: net2272: Fix net2272_dequeue() Restore the status of ep->stopped in function net2272_dequeue(). When the given request is not found in the endpoint queue the function returns -EINVAL without restoring the state of ep->stopped. Thus the endpoint keeps blocked and does not transfer any data anymore. This fix is only compile-tested, since we do not have a corresponding hardware. An analogous fix was tested in the sibling driver. See "usb: gadget: net2280: Fix net2280_dequeue()" Acked-by: Alan Stern Signed-off-by: Guido Kiener Signed-off-by: Felipe Balbi --- drivers/usb/gadget/udc/net2272.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c index b77f3126580e..c2011cd7df8c 100644 --- a/drivers/usb/gadget/udc/net2272.c +++ b/drivers/usb/gadget/udc/net2272.c @@ -945,6 +945,7 @@ net2272_dequeue(struct usb_ep *_ep, struct usb_request *_req) break; } if (&req->req != _req) { + ep->stopped = stopped; spin_unlock_irqrestore(&ep->dev->lock, flags); return -EINVAL; } -- cgit From 4b9ce3a651a37c60527101db4451a315a8b9588f Mon Sep 17 00:00:00 2001 From: Deepak Rawat Date: Thu, 28 Feb 2019 10:29:54 -0800 Subject: drm/vmwgfx: Return 0 when gmrid::get_node runs out of ID's If it's not a system error and get_node implementation accommodate the buffer object then it should return 0 with memm::mm_node set to NULL. v2: Test for id != -ENOMEM instead of id == -ENOSPC. Cc: Fixes: 4eb085e42fde ("drm/vmwgfx: Convert to new IDA API") Signed-off-by: Deepak Rawat Reviewed-by: Thomas Hellstrom Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c index b93c558dd86e..7da752ca1c34 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmrid_manager.c @@ -57,7 +57,7 @@ static int vmw_gmrid_man_get_node(struct ttm_mem_type_manager *man, id = ida_alloc_max(&gman->gmr_ida, gman->max_gmr_ids - 1, GFP_KERNEL); if (id < 0) - return id; + return (id != -ENOMEM ? 0 : id); spin_lock(&gman->lock); -- cgit From c2d311553855395764e2e5bf401d987ba65c2056 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 18 Mar 2019 15:47:58 +0100 Subject: drm/vmwgfx: Don't double-free the mode stored in par->set_mode When calling vmw_fb_set_par(), the mode stored in par->set_mode gets free'd twice. The first free is in vmw_fb_kms_detach(), the second is near the end of vmw_fb_set_par() under the name of 'old_mode'. The mode-setting code only works correctly if the mode doesn't actually change. Removing 'old_mode' in favor of using par->set_mode directly fixes the problem. Cc: Fixes: a278724aa23c ("drm/vmwgfx: Implement fbdev on kms v2") Signed-off-by: Thomas Zimmermann Reviewed-by: Deepak Rawat Signed-off-by: Thomas Hellstrom --- drivers/gpu/drm/vmwgfx/vmwgfx_fb.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c index b913a56f3426..2a9112515f46 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c @@ -564,11 +564,9 @@ static int vmw_fb_set_par(struct fb_info *info) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_PVSYNC) }; - struct drm_display_mode *old_mode; struct drm_display_mode *mode; int ret; - old_mode = par->set_mode; mode = drm_mode_duplicate(vmw_priv->dev, &new_mode); if (!mode) { DRM_ERROR("Could not create new fb mode.\n"); @@ -579,11 +577,7 @@ static int vmw_fb_set_par(struct fb_info *info) mode->vdisplay = var->yres; vmw_guess_mode_timing(mode); - if (old_mode && drm_mode_equal(old_mode, mode)) { - drm_mode_destroy(vmw_priv->dev, mode); - mode = old_mode; - old_mode = NULL; - } else if (!vmw_kms_validate_mode_vram(vmw_priv, + if (!vmw_kms_validate_mode_vram(vmw_priv, mode->hdisplay * DIV_ROUND_UP(var->bits_per_pixel, 8), mode->vdisplay)) { @@ -620,8 +614,8 @@ static int vmw_fb_set_par(struct fb_info *info) schedule_delayed_work(&par->local_work, 0); out_unlock: - if (old_mode) - drm_mode_destroy(vmw_priv->dev, old_mode); + if (par->set_mode) + drm_mode_destroy(vmw_priv->dev, par->set_mode); par->set_mode = mode; mutex_unlock(&par->bo_mutex); -- cgit From b25a31bf0ca091aa8bdb9ab329b0226257568bbe Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Tue, 19 Mar 2019 13:22:41 +0900 Subject: netfilter: nf_tables: add missing ->release_ops() in error path of newrule() ->release_ops() callback releases resources and this is used in error path. If nf_tables_newrule() fails after ->select_ops(), it should release resources. but it can not call ->destroy() because that should be called after ->init(). At this point, ->release_ops() should be used for releasing resources. Test commands: modprobe -rv xt_tcpudp iptables-nft -I INPUT -m tcp <-- error command lsmod Result: Module Size Used by xt_tcpudp 20480 2 <-- it should be 0 Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 513f93118604..ef7772e976cc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2806,8 +2806,11 @@ err2: nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { - if (info[i].ops != NULL) + if (info[i].ops) { module_put(info[i].ops->type->owner); + if (info[i].ops->type->release_ops) + info[i].ops->type->release_ops(info[i].ops); + } } kvfree(info); return err; -- cgit From 072684e8c58d17e853f8e8b9f6d9ce2e58d2b036 Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Tue, 5 Mar 2019 10:10:34 +0000 Subject: USB: gadget: f_hid: fix deadlock in f_hidg_write() In f_hidg_write() the write_spinlock is acquired before calling usb_ep_queue() which causes a deadlock when dummy_hcd is being used. This is because dummy_queue() callbacks into f_hidg_req_complete() which tries to acquire the same spinlock. This is (part of) the backtrace when the deadlock occurs: 0xffffffffc06b1410 in f_hidg_req_complete 0xffffffffc06a590a in usb_gadget_giveback_request 0xffffffffc06cfff2 in dummy_queue 0xffffffffc06a4b96 in usb_ep_queue 0xffffffffc06b1eb6 in f_hidg_write 0xffffffff8127730b in __vfs_write 0xffffffff812774d1 in vfs_write 0xffffffff81277725 in SYSC_write Fix this by releasing the write_spinlock before calling usb_ep_queue() Reviewed-by: James Bottomley Tested-by: James Bottomley Cc: stable@vger.kernel.org # 4.11+ Fixes: 749494b6bdbb ("usb: gadget: f_hid: fix: Move IN request allocation to set_alt()") Signed-off-by: Radoslav Gerganov Signed-off-by: Felipe Balbi --- drivers/usb/gadget/function/f_hid.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c index 75b113a5b25c..f3816a5c861e 100644 --- a/drivers/usb/gadget/function/f_hid.c +++ b/drivers/usb/gadget/function/f_hid.c @@ -391,20 +391,20 @@ try_again: req->complete = f_hidg_req_complete; req->context = hidg; + spin_unlock_irqrestore(&hidg->write_spinlock, flags); + status = usb_ep_queue(hidg->in_ep, req, GFP_ATOMIC); if (status < 0) { ERROR(hidg->func.config->cdev, "usb_ep_queue error on int endpoint %zd\n", status); - goto release_write_pending_unlocked; + goto release_write_pending; } else { status = count; } - spin_unlock_irqrestore(&hidg->write_spinlock, flags); return status; release_write_pending: spin_lock_irqsave(&hidg->write_spinlock, flags); -release_write_pending_unlocked: hidg->write_pending = 0; spin_unlock_irqrestore(&hidg->write_spinlock, flags); -- cgit From 032f85c9360fb1a08385c584c2c4ed114b33c260 Mon Sep 17 00:00:00 2001 From: Marco Felsch Date: Mon, 4 Mar 2019 11:49:40 +0100 Subject: ARM: dts: pfla02: increase phy reset duration Increase the reset duration to ensure correct phy functionality. The reset duration is taken from barebox commit 52fdd510de ("ARM: dts: pfla02: use long enough reset for ethernet phy"): Use a longer reset time for ethernet phy Micrel KSZ9031RNX. Otherwise a small percentage of modules have 'transmission timeouts' errors like barebox@Phytec phyFLEX-i.MX6 Quad Carrier-Board:/ ifup eth0 warning: No MAC address set. Using random address 7e:94:4d:02:f8:f3 eth0: 1000Mbps full duplex link detected eth0: transmission timeout T eth0: transmission timeout T eth0: transmission timeout T eth0: transmission timeout T eth0: transmission timeout Cc: Stefan Christ Cc: Christian Hemp Signed-off-by: Marco Felsch Fixes: 3180f956668e ("ARM: dts: Phytec imx6q pfla02 and pbab01 support") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi index 433bf09a1954..027df06c5dc7 100644 --- a/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi +++ b/arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi @@ -91,6 +91,7 @@ pinctrl-0 = <&pinctrl_enet>; phy-handle = <ðphy>; phy-mode = "rgmii"; + phy-reset-duration = <10>; /* in msecs */ phy-reset-gpios = <&gpio3 23 GPIO_ACTIVE_LOW>; phy-supply = <&vdd_eth_io_reg>; status = "disabled"; -- cgit From 2908b076f5198d231de62713cb2b633a3a4b95ac Mon Sep 17 00:00:00 2001 From: Lin Yi Date: Wed, 20 Mar 2019 19:04:56 +0800 Subject: USB: serial: mos7720: fix mos_parport refcount imbalance on error path The write_parport_reg_nonblock() helper takes a reference to the struct mos_parport, but failed to release it in a couple of error paths after allocation failures, leading to a memory leak. Johan said that move the kref_get() and mos_parport assignment to the end of urbtrack initialisation is a better way, so move it. and mos_parport do not used until urbtrack initialisation. Signed-off-by: Lin Yi Fixes: b69578df7e98 ("USB: usbserial: mos7720: add support for parallel port on moschip 7715") Cc: stable # 2.6.35 Signed-off-by: Johan Hovold --- drivers/usb/serial/mos7720.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/usb/serial/mos7720.c b/drivers/usb/serial/mos7720.c index fc52ac75fbf6..18110225d506 100644 --- a/drivers/usb/serial/mos7720.c +++ b/drivers/usb/serial/mos7720.c @@ -366,8 +366,6 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, if (!urbtrack) return -ENOMEM; - kref_get(&mos_parport->ref_count); - urbtrack->mos_parport = mos_parport; urbtrack->urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urbtrack->urb) { kfree(urbtrack); @@ -388,6 +386,8 @@ static int write_parport_reg_nonblock(struct mos7715_parport *mos_parport, usb_sndctrlpipe(usbdev, 0), (unsigned char *)urbtrack->setup, NULL, 0, async_complete, urbtrack); + kref_get(&mos_parport->ref_count); + urbtrack->mos_parport = mos_parport; kref_init(&urbtrack->ref_count); INIT_LIST_HEAD(&urbtrack->urblist_entry); -- cgit From 8bc086899816214fbc6047c9c7e15fcab49552bf Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 17 Mar 2019 01:17:56 +0000 Subject: powerpc/mm: Only define MAX_PHYSMEM_BITS in SPARSEMEM configurations MAX_PHYSMEM_BITS only needs to be defined if CONFIG_SPARSEMEM is enabled, and that was the case before commit 4ffe713b7587 ("powerpc/mm: Increase the max addressable memory to 2PB"). On 32-bit systems, where CONFIG_SPARSEMEM is not enabled, we now define it as 46. That is larger than the real number of physical address bits, and breaks calculations in zsmalloc: mm/zsmalloc.c:130:49: warning: right shift count is negative MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS)) ^~ ... mm/zsmalloc.c:253:21: error: variably modified 'size_class' at file scope struct size_class *size_class[ZS_SIZE_CLASSES]; ^~~~~~~~~~ Fixes: 4ffe713b7587 ("powerpc/mm: Increase the max addressable memory to 2PB") Cc: stable@vger.kernel.org # v4.20+ Signed-off-by: Ben Hutchings Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index d34ad1657d7b..598cdcdd1355 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -352,7 +352,7 @@ static inline bool strict_kernel_rwx_enabled(void) #if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \ defined (CONFIG_PPC_64K_PAGES) #define MAX_PHYSMEM_BITS 51 -#else +#elif defined(CONFIG_SPARSEMEM) #define MAX_PHYSMEM_BITS 46 #endif -- cgit From a75bb4eb9e565b9f5115e2e8c07377ce32cbe69a Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 18 Mar 2019 17:10:05 -0400 Subject: Revert "kbuild: use -Oz instead of -Os when using clang" The clang option -Oz enables *aggressive* optimization for size, which doesn't necessarily result in smaller images, but can have negative impact on performance. Switch back to the less aggressive -Os. This reverts commit 6748cb3c299de1ffbe56733647b01dbcc398c419. Suggested-by: Peter Zijlstra Signed-off-by: Matthias Kaehlcke Reviewed-by: Nick Desaulniers Signed-off-by: Masahiro Yamada --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 99c0530489ef..8c6acfdfe660 100644 --- a/Makefile +++ b/Makefile @@ -677,7 +677,7 @@ KBUILD_CFLAGS += $(call cc-disable-warning, format-overflow) KBUILD_CFLAGS += $(call cc-disable-warning, int-in-bool-context) ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE -KBUILD_CFLAGS += $(call cc-option,-Oz,-Os) +KBUILD_CFLAGS += -Os else KBUILD_CFLAGS += -O2 endif -- cgit From bb229bbb3bf63d23128e851a1f3b85c083178fa1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Mar 2019 09:46:58 +0100 Subject: libceph: wait for latest osdmap in ceph_monc_blacklist_add() Because map updates are distributed lazily, an OSD may not know about the new blacklist for quite some time after "osd blacklist add" command is completed. This makes it possible for a blacklisted but still alive client to overwrite a post-blacklist update, resulting in data corruption. Waiting for latest osdmap in ceph_monc_blacklist_add() and thus using the post-blacklist epoch for all post-blacklist requests ensures that all such requests "wait" for the blacklist to come into force on their respective OSDs. Cc: stable@vger.kernel.org Fixes: 6305a3b41515 ("libceph: support for blacklisting clients") Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- include/linux/ceph/libceph.h | 2 ++ net/ceph/ceph_common.c | 18 +++++++++++++++++- net/ceph/mon_client.c | 9 +++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index a420c07904bc..337d5049ff93 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -294,6 +294,8 @@ extern void ceph_destroy_client(struct ceph_client *client); extern int __ceph_open_session(struct ceph_client *client, unsigned long started); extern int ceph_open_session(struct ceph_client *client); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout); /* pagevec.c */ extern void ceph_release_page_vector(struct page **pages, int num_pages); diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 9cab80207ced..79eac465ec65 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -738,7 +738,6 @@ int __ceph_open_session(struct ceph_client *client, unsigned long started) } EXPORT_SYMBOL(__ceph_open_session); - int ceph_open_session(struct ceph_client *client) { int ret; @@ -754,6 +753,23 @@ int ceph_open_session(struct ceph_client *client) } EXPORT_SYMBOL(ceph_open_session); +int ceph_wait_for_latest_osdmap(struct ceph_client *client, + unsigned long timeout) +{ + u64 newest_epoch; + int ret; + + ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); + if (ret) + return ret; + + if (client->osdc.osdmap->epoch >= newest_epoch) + return 0; + + ceph_osdc_maybe_request_map(&client->osdc); + return ceph_monc_wait_osdmap(&client->monc, newest_epoch, timeout); +} +EXPORT_SYMBOL(ceph_wait_for_latest_osdmap); static int __init init_ceph_lib(void) { diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 18deb3d889c4..a53e4fbb6319 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -922,6 +922,15 @@ int ceph_monc_blacklist_add(struct ceph_mon_client *monc, mutex_unlock(&monc->mutex); ret = wait_generic_request(req); + if (!ret) + /* + * Make sure we have the osdmap that includes the blacklist + * entry. This is needed to ensure that the OSDs pick up the + * new blacklist before processing any future requests from + * this client. + */ + ret = ceph_wait_for_latest_osdmap(monc->client, 0); + out: put_generic_request(req); return ret; -- cgit From 9d4a227f6ef189cf37eb22641f6ee788b7dc41bb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 20 Mar 2019 10:58:05 +0100 Subject: rbd: drop wait_for_latest_osdmap() Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- drivers/block/rbd.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c index 3b2c9289dccb..2210c1b9491b 100644 --- a/drivers/block/rbd.c +++ b/drivers/block/rbd.c @@ -924,23 +924,6 @@ static void rbd_put_client(struct rbd_client *rbdc) kref_put(&rbdc->kref, rbd_client_release); } -static int wait_for_latest_osdmap(struct ceph_client *client) -{ - u64 newest_epoch; - int ret; - - ret = ceph_monc_get_version(&client->monc, "osdmap", &newest_epoch); - if (ret) - return ret; - - if (client->osdc.osdmap->epoch >= newest_epoch) - return 0; - - ceph_osdc_maybe_request_map(&client->osdc); - return ceph_monc_wait_osdmap(&client->monc, newest_epoch, - client->options->mount_timeout); -} - /* * Get a ceph client with specific addr and configuration, if one does * not exist create it. Either way, ceph_opts is consumed by this @@ -960,7 +943,8 @@ static struct rbd_client *rbd_get_client(struct ceph_options *ceph_opts) * Using an existing client. Make sure ->pg_pools is up to * date before we look up the pool id in do_rbd_add(). */ - ret = wait_for_latest_osdmap(rbdc->client); + ret = ceph_wait_for_latest_osdmap(rbdc->client, + rbdc->client->options->mount_timeout); if (ret) { rbd_warn(NULL, "failed to get latest osdmap: %d", ret); rbd_put_client(rbdc); -- cgit From 5cd1c56c42beb6d228cc8d4373fdc5f5ec78a5ad Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 15 Mar 2019 12:56:49 +0200 Subject: i2c: i801: Add support for Intel Comet Lake Add PCI ID for Intel Comet Lake PCH. Signed-off-by: Jarkko Nikula Reviewed-by: Jean Delvare Signed-off-by: Wolfram Sang --- Documentation/i2c/busses/i2c-i801 | 1 + drivers/i2c/busses/Kconfig | 1 + drivers/i2c/busses/i2c-i801.c | 4 ++++ 3 files changed, 6 insertions(+) diff --git a/Documentation/i2c/busses/i2c-i801 b/Documentation/i2c/busses/i2c-i801 index d1ee484a787d..ee9984f35868 100644 --- a/Documentation/i2c/busses/i2c-i801 +++ b/Documentation/i2c/busses/i2c-i801 @@ -36,6 +36,7 @@ Supported adapters: * Intel Cannon Lake (PCH) * Intel Cedar Fork (PCH) * Intel Ice Lake (PCH) + * Intel Comet Lake (PCH) Datasheets: Publicly available at the Intel website On Intel Patsburg and later chipsets, both the normal host SMBus controller diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index f2c681971201..f8979abb9a19 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -131,6 +131,7 @@ config I2C_I801 Cannon Lake (PCH) Cedar Fork (PCH) Ice Lake (PCH) + Comet Lake (PCH) This driver can also be built as a module. If so, the module will be called i2c-i801. diff --git a/drivers/i2c/busses/i2c-i801.c b/drivers/i2c/busses/i2c-i801.c index c91e145ef5a5..679c6c41f64b 100644 --- a/drivers/i2c/busses/i2c-i801.c +++ b/drivers/i2c/busses/i2c-i801.c @@ -71,6 +71,7 @@ * Cannon Lake-LP (PCH) 0x9da3 32 hard yes yes yes * Cedar Fork (PCH) 0x18df 32 hard yes yes yes * Ice Lake-LP (PCH) 0x34a3 32 hard yes yes yes + * Comet Lake (PCH) 0x02a3 32 hard yes yes yes * * Features supported by this driver: * Software PEC no @@ -240,6 +241,7 @@ #define PCI_DEVICE_ID_INTEL_LEWISBURG_SSKU_SMBUS 0xa223 #define PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS 0xa2a3 #define PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS 0xa323 +#define PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS 0x02a3 struct i801_mux_config { char *gpio_chip; @@ -1038,6 +1040,7 @@ static const struct pci_device_id i801_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_H_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CANNONLAKE_LP_SMBUS) }, { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS) }, { 0, } }; @@ -1534,6 +1537,7 @@ static int i801_probe(struct pci_dev *dev, const struct pci_device_id *id) case PCI_DEVICE_ID_INTEL_DNV_SMBUS: case PCI_DEVICE_ID_INTEL_KABYLAKE_PCH_H_SMBUS: case PCI_DEVICE_ID_INTEL_ICELAKE_LP_SMBUS: + case PCI_DEVICE_ID_INTEL_COMETLAKE_SMBUS: priv->features |= FEATURE_I2C_BLOCK_READ; priv->features |= FEATURE_IRQ; priv->features |= FEATURE_SMBUS_PEC; -- cgit From 3c3736cd32bf5197aed1410ae826d2d254a5b277 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 20 Mar 2019 14:57:19 +0000 Subject: KVM: arm/arm64: Fix handling of stage2 huge mappings We rely on the mmu_notifier call backs to handle the split/merge of huge pages and thus we are guaranteed that, while creating a block mapping, either the entire block is unmapped at stage2 or it is missing permission. However, we miss a case where the block mapping is split for dirty logging case and then could later be made block mapping, if we cancel the dirty logging. This not only creates inconsistent TLB entries for the pages in the the block, but also leakes the table pages for PMD level. Handle this corner case for the huge mappings at stage2 by unmapping the non-huge mapping for the block. This could potentially release the upper level table. So we need to restart the table walk once we unmap the range. Fixes : ad361f093c1e31d ("KVM: ARM: Support hugetlbfs backed huge pages") Reported-by: Zheng Xiang Cc: Zheng Xiang Cc: Zenghui Yu Cc: Christoffer Dall Signed-off-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm/include/asm/stage2_pgtable.h | 2 ++ virt/kvm/arm/mmu.c | 59 +++++++++++++++++++++++++---------- 2 files changed, 45 insertions(+), 16 deletions(-) diff --git a/arch/arm/include/asm/stage2_pgtable.h b/arch/arm/include/asm/stage2_pgtable.h index de2089501b8b..9e11dce55e06 100644 --- a/arch/arm/include/asm/stage2_pgtable.h +++ b/arch/arm/include/asm/stage2_pgtable.h @@ -75,6 +75,8 @@ static inline bool kvm_stage2_has_pud(struct kvm *kvm) #define S2_PMD_MASK PMD_MASK #define S2_PMD_SIZE PMD_SIZE +#define S2_PUD_MASK PUD_MASK +#define S2_PUD_SIZE PUD_SIZE static inline bool kvm_stage2_has_pmd(struct kvm *kvm) { diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index bcdf978c0d1d..f9da2fad9bd6 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -1067,25 +1067,43 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache { pmd_t *pmd, old_pmd; +retry: pmd = stage2_get_pmd(kvm, cache, addr); VM_BUG_ON(!pmd); old_pmd = *pmd; + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + if (pmd_present(old_pmd)) { /* - * Multiple vcpus faulting on the same PMD entry, can - * lead to them sequentially updating the PMD with the - * same value. Following the break-before-make - * (pmd_clear() followed by tlb_flush()) process can - * hinder forward progress due to refaults generated - * on missing translations. + * If we already have PTE level mapping for this block, + * we must unmap it to avoid inconsistent TLB state and + * leaking the table page. We could end up in this situation + * if the memory slot was marked for dirty logging and was + * reverted, leaving PTE level mappings for the pages accessed + * during the period. So, unmap the PTE level mapping for this + * block and retry, as we could have released the upper level + * table in the process. * - * Skip updating the page table if the entry is - * unchanged. + * Normal THP split/merge follows mmu_notifier callbacks and do + * get handled accordingly. */ - if (pmd_val(old_pmd) == pmd_val(*new_pmd)) - return 0; - + if (!pmd_thp_or_huge(old_pmd)) { + unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + goto retry; + } /* * Mapping in huge pages should only happen through a * fault. If a page is merged into a transparent huge @@ -1097,8 +1115,7 @@ static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache * should become splitting first, unmapped, merged, * and mapped back in on-demand. */ - VM_BUG_ON(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); - + WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); pmd_clear(pmd); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { @@ -1114,6 +1131,7 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac { pud_t *pudp, old_pud; +retry: pudp = stage2_get_pud(kvm, cache, addr); VM_BUG_ON(!pudp); @@ -1121,14 +1139,23 @@ static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cac /* * A large number of vcpus faulting on the same stage 2 entry, - * can lead to a refault due to the - * stage2_pud_clear()/tlb_flush(). Skip updating the page - * tables if there is no change. + * can lead to a refault due to the stage2_pud_clear()/tlb_flush(). + * Skip updating the page tables if there is no change. */ if (pud_val(old_pud) == pud_val(*new_pudp)) return 0; if (stage2_pud_present(kvm, old_pud)) { + /* + * If we already have table level mapping for this block, unmap + * the range for this block and retry. + */ + if (!stage2_pud_huge(kvm, old_pud)) { + unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + goto retry; + } + + WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); stage2_pud_clear(kvm, pudp); kvm_tlb_flush_vmid_ipa(kvm, addr); } else { -- cgit From d9ea27a3304812500de3674981a9c3a2086d517b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 20 Mar 2019 22:18:13 +0800 Subject: KVM: arm/arm64: vgic-its: Make attribute accessors static Fix sparse warnings: arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-its.c:1732:5: warning: symbol 'vgic_its_has_attr_regs' was not declared. Should it be static? arch/arm64/kvm/../../../virt/kvm/arm/vgic/vgic-its.c:1753:5: warning: symbol 'vgic_its_attr_regs_access' was not declared. Should it be static? Signed-off-by: YueHaibing [maz: fixed subject] Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-its.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index fcb2fceaa4a5..44ceaccb18cf 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -1736,8 +1736,8 @@ static void vgic_its_destroy(struct kvm_device *kvm_dev) kfree(its); } -int vgic_its_has_attr_regs(struct kvm_device *dev, - struct kvm_device_attr *attr) +static int vgic_its_has_attr_regs(struct kvm_device *dev, + struct kvm_device_attr *attr) { const struct vgic_register_region *region; gpa_t offset = attr->attr; @@ -1757,9 +1757,9 @@ int vgic_its_has_attr_regs(struct kvm_device *dev, return 0; } -int vgic_its_attr_regs_access(struct kvm_device *dev, - struct kvm_device_attr *attr, - u64 *reg, bool is_write) +static int vgic_its_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) { const struct vgic_register_region *region; struct vgic_its *its; -- cgit From e5a5af7718610c819c4d368bb62655ee43a38011 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 20 Mar 2019 10:20:56 -0700 Subject: arm64: remove obsolete selection of MULTI_IRQ_HANDLER The arm64 config selects MULTI_IRQ_HANDLER, which was renamed to GENERIC_IRQ_MULTI_HANDLER by commit 4c301f9b6a94 ("ARM: Convert to GENERIC_IRQ_MULTI_HANDLER"). The 'new' option is already selected, so just remove the obsolete entry. Signed-off-by: Matthias Kaehlcke Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 117b2541ef3d..7e34b9eba5de 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -159,7 +159,6 @@ config ARM64 select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA - select MULTI_IRQ_HANDLER select NEED_DMA_MAP_STATE select NEED_SG_DMA_LENGTH select OF -- cgit From 398f0132c14754fcd03c1c4f8e7176d001ce8ea1 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 18 Mar 2019 23:14:52 -0700 Subject: net/packet: Set __GFP_NOWARN upon allocation in alloc_pg_vec Since commit fc62814d690c ("net/packet: fix 4gb buffer limit due to overflow check") one can now allocate packet ring buffers >= UINT_MAX. However, syzkaller found that that triggers a warning: [ 21.100000] WARNING: CPU: 2 PID: 2075 at mm/page_alloc.c:4584 __alloc_pages_nod0 [ 21.101490] Modules linked in: [ 21.101921] CPU: 2 PID: 2075 Comm: syz-executor.0 Not tainted 5.0.0 #146 [ 21.102784] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.5.1 01/01/2011 [ 21.103887] RIP: 0010:__alloc_pages_nodemask+0x2a0/0x630 [ 21.104640] Code: fe ff ff 65 48 8b 04 25 c0 de 01 00 48 05 90 0f 00 00 41 bd 01 00 00 00 48 89 44 24 48 e9 9c fe 3 [ 21.107121] RSP: 0018:ffff88805e1cf920 EFLAGS: 00010246 [ 21.107819] RAX: 0000000000000000 RBX: ffffffff85a488a0 RCX: 0000000000000000 [ 21.108753] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: 0000000000000000 [ 21.109699] RBP: 1ffff1100bc39f28 R08: ffffed100bcefb67 R09: ffffed100bcefb67 [ 21.110646] R10: 0000000000000001 R11: ffffed100bcefb66 R12: 000000000000000d [ 21.111623] R13: 0000000000000000 R14: ffff88805e77d888 R15: 000000000000000d [ 21.112552] FS: 00007f7c7de05700(0000) GS:ffff88806d100000(0000) knlGS:0000000000000000 [ 21.113612] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 21.114405] CR2: 000000000065c000 CR3: 000000005e58e006 CR4: 00000000001606e0 [ 21.115367] Call Trace: [ 21.115705] ? __alloc_pages_slowpath+0x21c0/0x21c0 [ 21.116362] alloc_pages_current+0xac/0x1e0 [ 21.116923] kmalloc_order+0x18/0x70 [ 21.117393] kmalloc_order_trace+0x18/0x110 [ 21.117949] packet_set_ring+0x9d5/0x1770 [ 21.118524] ? packet_rcv_spkt+0x440/0x440 [ 21.119094] ? lock_downgrade+0x620/0x620 [ 21.119646] ? __might_fault+0x177/0x1b0 [ 21.120177] packet_setsockopt+0x981/0x2940 [ 21.120753] ? __fget+0x2fb/0x4b0 [ 21.121209] ? packet_release+0xab0/0xab0 [ 21.121740] ? sock_has_perm+0x1cd/0x260 [ 21.122297] ? selinux_secmark_relabel_packet+0xd0/0xd0 [ 21.123013] ? __fget+0x324/0x4b0 [ 21.123451] ? selinux_netlbl_socket_setsockopt+0x101/0x320 [ 21.124186] ? selinux_netlbl_sock_rcv_skb+0x3a0/0x3a0 [ 21.124908] ? __lock_acquire+0x529/0x3200 [ 21.125453] ? selinux_socket_setsockopt+0x5d/0x70 [ 21.126075] ? __sys_setsockopt+0x131/0x210 [ 21.126533] ? packet_release+0xab0/0xab0 [ 21.127004] __sys_setsockopt+0x131/0x210 [ 21.127449] ? kernel_accept+0x2f0/0x2f0 [ 21.127911] ? ret_from_fork+0x8/0x50 [ 21.128313] ? do_raw_spin_lock+0x11b/0x280 [ 21.128800] __x64_sys_setsockopt+0xba/0x150 [ 21.129271] ? lockdep_hardirqs_on+0x37f/0x560 [ 21.129769] do_syscall_64+0x9f/0x450 [ 21.130182] entry_SYSCALL_64_after_hwframe+0x49/0xbe We should allocate with __GFP_NOWARN to handle this. Cc: Kal Conley Cc: Andrey Konovalov Fixes: fc62814d690c ("net/packet: fix 4gb buffer limit due to overflow check") Signed-off-by: Christoph Paasch Signed-off-by: David S. Miller --- net/packet/af_packet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 323655a25674..9419c5cf4de5 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4210,7 +4210,7 @@ static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) struct pgv *pg_vec; int i; - pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL); + pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); if (unlikely(!pg_vec)) goto out; -- cgit From 1c87e79a002f6a159396138cd3f3ab554a2a8887 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Mar 2019 14:45:48 +0800 Subject: ipv6: make ip6_create_rt_rcu return ip6_null_entry instead of NULL Jianlin reported a crash: [ 381.484332] BUG: unable to handle kernel NULL pointer dereference at 0000000000000068 [ 381.619802] RIP: 0010:fib6_rule_lookup+0xa3/0x160 [ 382.009615] Call Trace: [ 382.020762] [ 382.030174] ip6_route_redirect.isra.52+0xc9/0xf0 [ 382.050984] ip6_redirect+0xb6/0xf0 [ 382.066731] icmpv6_notify+0xca/0x190 [ 382.083185] ndisc_redirect_rcv+0x10f/0x160 [ 382.102569] ndisc_rcv+0xfb/0x100 [ 382.117725] icmpv6_rcv+0x3f2/0x520 [ 382.133637] ip6_input_finish+0xbf/0x460 [ 382.151634] ip6_input+0x3b/0xb0 [ 382.166097] ipv6_rcv+0x378/0x4e0 It was caused by the lookup function __ip6_route_redirect() returns NULL in fib6_rule_lookup() when ip6_create_rt_rcu() returns NULL. So we fix it by simply making ip6_create_rt_rcu() return ip6_null_entry instead of NULL. v1->v2: - move down 'fallback:' to make it more readable. Fixes: e873e4b9cc7e ("ipv6: use fib6_info_hold_safe() when necessary") Reported-by: Jianlin Shi Suggested-by: Paolo Abeni Signed-off-by: Xin Long Reviewed-by: David Ahern Acked-by: Wei Wang Signed-off-by: David S. Miller --- net/ipv6/route.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4ef4bbdb49d4..0302e0eb07af 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1040,14 +1040,20 @@ static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt) struct rt6_info *nrt; if (!fib6_info_hold_safe(rt)) - return NULL; + goto fallback; nrt = ip6_dst_alloc(dev_net(dev), dev, flags); - if (nrt) - ip6_rt_copy_init(nrt, rt); - else + if (!nrt) { fib6_info_release(rt); + goto fallback; + } + ip6_rt_copy_init(nrt, rt); + return nrt; + +fallback: + nrt = dev_net(dev)->ipv6.ip6_null_entry; + dst_hold(&nrt->dst); return nrt; } @@ -1096,10 +1102,6 @@ restart: dst_hold(&rt->dst); } else { rt = ip6_create_rt_rcu(f6i); - if (!rt) { - rt = net->ipv6.ip6_null_entry; - dst_hold(&rt->dst); - } } rcu_read_unlock(); -- cgit From ef82bcfa671b9a635bab5fa669005663d8b177c5 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 20 Mar 2019 14:49:38 +0800 Subject: sctp: use memdup_user instead of vmemdup_user In sctp_setsockopt_bindx()/__sctp_setsockopt_connectx(), it allocates memory with addrs_size which is passed from userspace. We used flag GFP_USER to put some more restrictions on it in Commit cacc06215271 ("sctp: use GFP_USER for user-controlled kmalloc"). However, since Commit c981f254cc82 ("sctp: use vmemdup_user() rather than badly open-coding memdup_user()"), vmemdup_user() has been used, which doesn't check GFP_USER flag when goes to vmalloc_*(). So when addrs_size is a huge value, it could exhaust memory and even trigger oom killer. This patch is to use memdup_user() instead, in which GFP_USER would work to limit the memory allocation with a huge addrs_size. Note we can't fix it by limiting 'addrs_size', as there's no demand for it from RFC. Reported-by: syzbot+ec1b7575afef85a0e5ca@syzkaller.appspotmail.com Fixes: c981f254cc82 ("sctp: use vmemdup_user() rather than badly open-coding memdup_user()") Signed-off-by: Xin Long Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/socket.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 011c349d877a..9874e60c9b0d 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -999,7 +999,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = vmemdup_user(addrs, addrs_size); + kaddrs = memdup_user(addrs, addrs_size); if (unlikely(IS_ERR(kaddrs))) return PTR_ERR(kaddrs); @@ -1007,7 +1007,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, addr_buf = kaddrs; while (walk_size < addrs_size) { if (walk_size + sizeof(sa_family_t) > addrs_size) { - kvfree(kaddrs); + kfree(kaddrs); return -EINVAL; } @@ -1018,7 +1018,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, * causes the address buffer to overflow return EINVAL. */ if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - kvfree(kaddrs); + kfree(kaddrs); return -EINVAL; } addrcnt++; @@ -1054,7 +1054,7 @@ static int sctp_setsockopt_bindx(struct sock *sk, } out: - kvfree(kaddrs); + kfree(kaddrs); return err; } @@ -1329,7 +1329,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, if (unlikely(addrs_size <= 0)) return -EINVAL; - kaddrs = vmemdup_user(addrs, addrs_size); + kaddrs = memdup_user(addrs, addrs_size); if (unlikely(IS_ERR(kaddrs))) return PTR_ERR(kaddrs); @@ -1349,7 +1349,7 @@ static int __sctp_setsockopt_connectx(struct sock *sk, err = __sctp_connect(sk, kaddrs, addrs_size, flags, assoc_id); out_free: - kvfree(kaddrs); + kfree(kaddrs); return err; } -- cgit From 89dc891792c2e046b030f87600109c22209da32e Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Tue, 12 Mar 2019 18:33:46 +0100 Subject: irqchip/gic-v3-its: Fix comparison logic in lpi_range_cmp The lpi_range_list is supposed to be sorted in ascending order of ->base_id (at least if the range merging is to work), but the current comparison function returns a positive value if rb->base_id > ra->base_id, which means that list_sort() will put A after B in that case - and vice versa, of course. Fixes: 880cb3cddd16 (irqchip/gic-v3-its: Refactor LPI allocator) Cc: stable@vger.kernel.org (v4.19+) Signed-off-by: Rasmus Villemoes Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-gic-v3-its.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index fb7157188294..783810716015 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -1476,7 +1476,7 @@ static int lpi_range_cmp(void *priv, struct list_head *a, struct list_head *b) ra = container_of(a, struct lpi_range, entry); rb = container_of(b, struct lpi_range, entry); - return rb->base_id - ra->base_id; + return ra->base_id - rb->base_id; } static void merge_lpi_ranges(void) -- cgit From 0e83fc61eee62979260f6aeadd23ee8b615ee1a2 Mon Sep 17 00:00:00 2001 From: Luo Jiaxing Date: Wed, 20 Mar 2019 18:21:34 +0800 Subject: scsi: hisi_sas: Add softreset in hisi_sas_I_T_nexus_reset() We found out that for v2 hw, a SATA disk can not be written to after the system comes up. In commit ffb1c820b8b6 ("scsi: hisi_sas: remove the check of sas_dev status in hisi_sas_I_T_nexus_reset()"), we introduced a path where we may issue an internal abort for a SATA device, but without following it with a softreset. We need to always follow an internal abort with a software reset, as per HW programming flow, so add this. Fixes: ffb1c820b8b6 ("scsi: hisi_sas: remove the check of sas_dev status in hisi_sas_I_T_nexus_reset()") Signed-off-by: Luo Jiaxing Signed-off-by: John Garry Signed-off-by: Martin K. Petersen --- drivers/scsi/hisi_sas/hisi_sas_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c index 3c3cf89f713f..14bac4966c87 100644 --- a/drivers/scsi/hisi_sas/hisi_sas_main.c +++ b/drivers/scsi/hisi_sas/hisi_sas_main.c @@ -1801,6 +1801,12 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device) } hisi_sas_dereg_device(hisi_hba, device); + if (dev_is_sata(device)) { + rc = hisi_sas_softreset_ata_disk(device); + if (rc) + return TMF_RESP_FUNC_FAILED; + } + rc = hisi_sas_debug_I_T_nexus_reset(device); if ((rc == TMF_RESP_FUNC_COMPLETE) || (rc == -ENODEV)) -- cgit From 0ccc3876e4b2a1559a4dbe3126dda4459d38a83b Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 19 Mar 2019 17:18:13 +0000 Subject: Btrfs: fix assertion failure on fsync with NO_HOLES enabled Back in commit a89ca6f24ffe4 ("Btrfs: fix fsync after truncate when no_holes feature is enabled") I added an assertion that is triggered when an inline extent is found to assert that the length of the (uncompressed) data the extent represents is the same as the i_size of the inode, since that is true most of the time I couldn't find or didn't remembered about any exception at that time. Later on the assertion was expanded twice to deal with a case of a compressed inline extent representing a range that matches the sector size followed by an expanding truncate, and another case where fallocate can update the i_size of the inode without adding or updating existing extents (if the fallocate range falls entirely within the first block of the file). These two expansion/fixes of the assertion were done by commit 7ed586d0a8241 ("Btrfs: fix assertion on fsync of regular file when using no-holes feature") and commit 6399fb5a0b69a ("Btrfs: fix assertion failure during fsync in no-holes mode"). These however missed the case where an falloc expands the i_size of an inode to exactly the sector size and inline extent exists, for example: $ mkfs.btrfs -f -O no-holes /dev/sdc $ mount /dev/sdc /mnt $ xfs_io -f -c "pwrite -S 0xab 0 1096" /mnt/foobar wrote 1096/1096 bytes at offset 0 1 KiB, 1 ops; 0.0002 sec (4.448 MiB/sec and 4255.3191 ops/sec) $ xfs_io -c "falloc 1096 3000" /mnt/foobar $ xfs_io -c "fsync" /mnt/foobar Segmentation fault $ dmesg [701253.602385] assertion failed: len == i_size || (len == fs_info->sectorsize && btrfs_file_extent_compression(leaf, extent) != BTRFS_COMPRESS_NONE) || (len < i_size && i_size < fs_info->sectorsize), file: fs/btrfs/tree-log.c, line: 4727 [701253.602962] ------------[ cut here ]------------ [701253.603224] kernel BUG at fs/btrfs/ctree.h:3533! [701253.603503] invalid opcode: 0000 [#1] SMP DEBUG_PAGEALLOC PTI [701253.603774] CPU: 2 PID: 7192 Comm: xfs_io Tainted: G W 5.0.0-rc8-btrfs-next-45 #1 [701253.604054] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.11.2-0-gf9626ccb91-prebuilt.qemu-project.org 04/01/2014 [701253.604650] RIP: 0010:assfail.constprop.23+0x18/0x1a [btrfs] (...) [701253.605591] RSP: 0018:ffffbb48c186bc48 EFLAGS: 00010286 [701253.605914] RAX: 00000000000000de RBX: ffff921d0a7afc08 RCX: 0000000000000000 [701253.606244] RDX: 0000000000000000 RSI: ffff921d36b16868 RDI: ffff921d36b16868 [701253.606580] RBP: ffffbb48c186bcf0 R08: 0000000000000000 R09: 0000000000000000 [701253.606913] R10: 0000000000000003 R11: 0000000000000000 R12: ffff921d05d2de18 [701253.607247] R13: ffff921d03b54000 R14: 0000000000000448 R15: ffff921d059ecf80 [701253.607769] FS: 00007f14da906700(0000) GS:ffff921d36b00000(0000) knlGS:0000000000000000 [701253.608163] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [701253.608516] CR2: 000056087ea9f278 CR3: 00000002268e8001 CR4: 00000000003606e0 [701253.608880] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [701253.609250] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [701253.609608] Call Trace: [701253.609994] btrfs_log_inode+0xdfb/0xe40 [btrfs] [701253.610383] btrfs_log_inode_parent+0x2be/0xa60 [btrfs] [701253.610770] ? do_raw_spin_unlock+0x49/0xc0 [701253.611150] btrfs_log_dentry_safe+0x4a/0x70 [btrfs] [701253.611537] btrfs_sync_file+0x3b2/0x440 [btrfs] [701253.612010] ? do_sysinfo+0xb0/0xf0 [701253.612552] do_fsync+0x38/0x60 [701253.612988] __x64_sys_fsync+0x10/0x20 [701253.613360] do_syscall_64+0x60/0x1b0 [701253.613733] entry_SYSCALL_64_after_hwframe+0x49/0xbe [701253.614103] RIP: 0033:0x7f14da4e66d0 (...) [701253.615250] RSP: 002b:00007fffa670fdb8 EFLAGS: 00000246 ORIG_RAX: 000000000000004a [701253.615647] RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007f14da4e66d0 [701253.616047] RDX: 000056087ea9c260 RSI: 000056087ea9c260 RDI: 0000000000000003 [701253.616450] RBP: 0000000000000001 R08: 0000000000000020 R09: 0000000000000010 [701253.616854] R10: 000000000000009b R11: 0000000000000246 R12: 000056087ea9c260 [701253.617257] R13: 000056087ea9c240 R14: 0000000000000000 R15: 000056087ea9dd10 (...) [701253.619941] ---[ end trace e088d74f132b6da5 ]--- Updating the assertion again to allow for this particular case would result in a meaningless assertion, plus there is currently no risk of logging content that would result in any corruption after a log replay if the size of the data encoded in an inline extent is greater than the inode's i_size (which is not currently possibe either with or without compression), therefore just remove the assertion. CC: stable@vger.kernel.org # 4.4+ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 960ea49a7a0f..561884f60d35 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4725,15 +4725,8 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, extent) == - BTRFS_FILE_EXTENT_INLINE) { - len = btrfs_file_extent_ram_bytes(leaf, extent); - ASSERT(len == i_size || - (len == fs_info->sectorsize && - btrfs_file_extent_compression(leaf, extent) != - BTRFS_COMPRESS_NONE) || - (len < i_size && i_size < fs_info->sectorsize)); + BTRFS_FILE_EXTENT_INLINE) return 0; - } len = btrfs_file_extent_num_bytes(leaf, extent); /* Last extent goes beyond i_size, no need to log a hole. */ -- cgit From 8a1b1718214cfd945fef14b3031e4e7262882a86 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:48 -0700 Subject: perf build: Check what binutils's 'disassembler()' signature to use Commit 003ca0fd2286 ("Refactor disassembler selection") in the binutils repo, which changed the disassembler() function signature, so we must use the feature test introduced in fb982666e380 ("tools/bpftool: fix bpftool build with bintutils >= 2.9") to deal with that. Committer testing: After adding the missing function call to test-all.c, and: FEATURE_CHECK_LDFLAGS-disassembler-four-args = -bfd -lopcodes And the fallbacks for cases where we need -liberty and sometimes -lz to tools/perf/Makefile.config, we get: $ make -C tools/perf O=/tmp/build/perf install-bin make: Entering directory '/home/acme/git/perf/tools/perf' BUILD: Doing 'make -j8' parallel build Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... gtk2: [ on ] ... libaudit: [ on ] ... libbfd: [ on ] ... libelf: [ on ] ... libnuma: [ on ] ... numa_num_possible_cpus: [ on ] ... libperl: [ on ] ... libpython: [ on ] ... libslang: [ on ] ... libcrypto: [ on ] ... libunwind: [ on ] ... libdw-dwarf-unwind: [ on ] ... zlib: [ on ] ... lzma: [ on ] ... get_cpuid: [ on ] ... bpf: [ on ] ... libaio: [ on ] ... disassembler-four-args: [ on ] CC /tmp/build/perf/jvmti/libjvmti.o CC /tmp/build/perf/builtin-bench.o $ $ The feature detection test-all.bin gets successfully built and linked: $ ls -la /tmp/build/perf/feature/test-all.bin -rwxrwxr-x. 1 acme acme 2680352 Mar 19 11:07 /tmp/build/perf/feature/test-all.bin $ nm /tmp/build/perf/feature/test-all.bin | grep -w disassembler 0000000000061f90 T disassembler $ Time to move on to the patches that make use of this disassembler() routine in binutils's libopcodes. Signed-off-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jakub Kicinski Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Roman Gushchin Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-13-songliubraving@fb.com [ split from a larger patch, added missing FEATURE_CHECK_LDFLAGS-disassembler-four-args ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 6 ++++-- tools/build/feature/test-all.c | 5 +++++ tools/perf/Makefile.config | 9 +++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index 61e46d54a67c..8d3864b061f3 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -66,7 +66,8 @@ FEATURE_TESTS_BASIC := \ sched_getcpu \ sdt \ setns \ - libaio + libaio \ + disassembler-four-args # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests @@ -118,7 +119,8 @@ FEATURE_DISPLAY ?= \ lzma \ get_cpuid \ bpf \ - libaio + libaio \ + disassembler-four-args # Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. # If in the future we need per-feature checks/flags for features not diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index e903b86b742f..7853e6d91090 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -178,6 +178,10 @@ # include "test-reallocarray.c" #undef main +#define main main_test_disassembler_four_args +# include "test-disassembler-four-args.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -219,6 +223,7 @@ int main(int argc, char *argv[]) main_test_setns(); main_test_libaio(); main_test_reallocarray(); + main_test_disassembler_four_args(); return 0; } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index df4ad45599ca..fe3f97e342fa 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -227,6 +227,8 @@ FEATURE_CHECK_LDFLAGS-libpython-version := $(PYTHON_EMBED_LDOPTS) FEATURE_CHECK_LDFLAGS-libaio = -lrt +FEATURE_CHECK_LDFLAGS-disassembler-four-args = -lbfd -lopcodes + CFLAGS += -fno-omit-frame-pointer CFLAGS += -ggdb3 CFLAGS += -funwind-tables @@ -725,11 +727,14 @@ else ifeq ($(feature-libbfd-liberty), 1) EXTLIBS += -lbfd -lopcodes -liberty + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl else ifeq ($(feature-libbfd-liberty-z), 1) EXTLIBS += -lbfd -lopcodes -liberty -lz + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl endif endif + $(call feature_check,disassembler-four-args) endif ifdef NO_DEMANGLE @@ -808,6 +813,10 @@ ifdef HAVE_KVM_STAT_SUPPORT CFLAGS += -DHAVE_KVM_STAT_SUPPORT endif +ifeq ($(feature-disassembler-four-args), 1) + CFLAGS += -DDISASM_FOUR_ARGS_SIGNATURE +endif + ifeq (${IS_64_BIT}, 1) ifndef NO_PERF_READ_VDSO32 $(call feature_check,compile-32) -- cgit From 6987561c9e86eace45f2dbb0c564964a63f4150a Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:48 -0700 Subject: perf annotate: Enable annotation of BPF programs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In symbol__disassemble(), DSO_BINARY_TYPE__BPF_PROG_INFO dso calls into a new function symbol__disassemble_bpf(), where annotation line information is filled based on the bpf_prog_info and btf data saved in given perf_env. symbol__disassemble_bpf() uses binutils's libopcodes to disassemble bpf programs. Committer testing: After fixing this: - u64 *addrs = (u64 *)(info_linear->info.jited_ksyms); + u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms); Detected when crossbuilding to a 32-bit arch. And making all this dependent on HAVE_LIBBFD_SUPPORT and HAVE_LIBBPF_SUPPORT: 1) Have a BPF program running, one that has BTF info, etc, I used the tools/perf/examples/bpf/augmented_raw_syscalls.c put in place by 'perf trace'. # grep -B1 augmented_raw ~/.perfconfig [trace] add_events = /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c # # perf trace -e *mmsg dnf/6245 sendmmsg(20, 0x7f5485a88030, 2, MSG_NOSIGNAL) = 2 NetworkManager/10055 sendmmsg(22, 0x7f8126ad1bb0, 2, MSG_NOSIGNAL) = 2 2) Then do a 'perf record' system wide for a while: # perf record -a ^C[ perf record: Woken up 68 times to write data ] [ perf record: Captured and wrote 19.427 MB perf.data (366891 samples) ] # 3) Check that we captured BPF and BTF info in the perf.data file: # perf report --header-only | grep 'b[pt]f' # event : name = cycles:ppp, , id = { 294789, 294790, 294791, 294792, 294793, 294794, 294795, 294796 }, size = 112, { sample_period, sample_freq } = 4000, sample_type = IP|TID|TIME|CPU|PERIOD, read_format = ID, disabled = 1, inherit = 1, mmap = 1, comm = 1, freq = 1, task = 1, precise_ip = 3, sample_id_all = 1, exclude_guest = 1, mmap2 = 1, comm_exec = 1, ksymbol = 1, bpf_event = 1 # bpf_prog_info of id 13 # bpf_prog_info of id 14 # bpf_prog_info of id 15 # bpf_prog_info of id 16 # bpf_prog_info of id 17 # bpf_prog_info of id 18 # bpf_prog_info of id 21 # bpf_prog_info of id 22 # bpf_prog_info of id 41 # bpf_prog_info of id 42 # btf info of id 2 # 4) Check which programs got recorded: # perf report | grep bpf_prog | head 0.16% exe bpf_prog_819967866022f1e1_sys_enter [k] bpf_prog_819967866022f1e1_sys_enter 0.14% exe bpf_prog_c1bd85c092d6e4aa_sys_exit [k] bpf_prog_c1bd85c092d6e4aa_sys_exit 0.08% fuse-overlayfs bpf_prog_819967866022f1e1_sys_enter [k] bpf_prog_819967866022f1e1_sys_enter 0.07% fuse-overlayfs bpf_prog_c1bd85c092d6e4aa_sys_exit [k] bpf_prog_c1bd85c092d6e4aa_sys_exit 0.01% clang-4.0 bpf_prog_c1bd85c092d6e4aa_sys_exit [k] bpf_prog_c1bd85c092d6e4aa_sys_exit 0.01% clang-4.0 bpf_prog_819967866022f1e1_sys_enter [k] bpf_prog_819967866022f1e1_sys_enter 0.00% clang bpf_prog_c1bd85c092d6e4aa_sys_exit [k] bpf_prog_c1bd85c092d6e4aa_sys_exit 0.00% runc bpf_prog_819967866022f1e1_sys_enter [k] bpf_prog_819967866022f1e1_sys_enter 0.00% clang bpf_prog_819967866022f1e1_sys_enter [k] bpf_prog_819967866022f1e1_sys_enter 0.00% sh bpf_prog_c1bd85c092d6e4aa_sys_exit [k] bpf_prog_c1bd85c092d6e4aa_sys_exit # This was with the default --sort order for 'perf report', which is: --sort comm,dso,symbol If we just look for the symbol, for instance: # perf report --sort symbol | grep bpf_prog | head 0.26% [k] bpf_prog_819967866022f1e1_sys_enter - - 0.24% [k] bpf_prog_c1bd85c092d6e4aa_sys_exit - - # or the DSO: # perf report --sort dso | grep bpf_prog | head 0.26% bpf_prog_819967866022f1e1_sys_enter 0.24% bpf_prog_c1bd85c092d6e4aa_sys_exit # We'll see the two BPF programs that augmented_raw_syscalls.o puts in place, one attached to the raw_syscalls:sys_enter and another to the raw_syscalls:sys_exit tracepoints, as expected. Now we can finally do, from the command line, annotation for one of those two symbols, with the original BPF program source coude intermixed with the disassembled JITed code: # perf annotate --stdio2 bpf_prog_819967866022f1e1_sys_enter Samples: 950 of event 'cycles:ppp', 4000 Hz, Event count (approx.): 553756947, [percent: local period] bpf_prog_819967866022f1e1_sys_enter() bpf_prog_819967866022f1e1_sys_enter Percent int sys_enter(struct syscall_enter_args *args) 53.41 push %rbp 0.63 mov %rsp,%rbp 0.31 sub $0x170,%rsp 1.93 sub $0x28,%rbp 7.02 mov %rbx,0x0(%rbp) 3.20 mov %r13,0x8(%rbp) 1.07 mov %r14,0x10(%rbp) 0.61 mov %r15,0x18(%rbp) 0.11 xor %eax,%eax 1.29 mov %rax,0x20(%rbp) 0.11 mov %rdi,%rbx return bpf_get_current_pid_tgid(); 2.02 → callq *ffffffffda6776d9 2.76 mov %eax,-0x148(%rbp) mov %rbp,%rsi int sys_enter(struct syscall_enter_args *args) add $0xfffffffffffffeb8,%rsi return bpf_map_lookup_elem(pids, &pid) != NULL; movabs $0xffff975ac2607800,%rdi 1.26 → callq *ffffffffda6789e9 cmp $0x0,%rax 2.43 → je 0 add $0x38,%rax 0.21 xor %r13d,%r13d if (pid_filter__has(&pids_filtered, getpid())) 0.81 cmp $0x0,%rax → jne 0 mov %rbp,%rdi probe_read(&augmented_args.args, sizeof(augmented_args.args), args); 2.22 add $0xfffffffffffffeb8,%rdi 0.11 mov $0x40,%esi 0.32 mov %rbx,%rdx 2.74 → callq *ffffffffda658409 syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr); 0.22 mov %rbp,%rsi 1.69 add $0xfffffffffffffec0,%rsi syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr); movabs $0xffff975bfcd36000,%rdi add $0xd0,%rdi 0.21 mov 0x0(%rsi),%eax 0.93 cmp $0x200,%rax → jae 0 0.10 shl $0x3,%rax 0.11 add %rdi,%rax 0.11 → jmp 0 xor %eax,%eax if (syscall == NULL || !syscall->enabled) 1.07 cmp $0x0,%rax → je 0 if (syscall == NULL || !syscall->enabled) 6.57 movzbq 0x0(%rax),%rdi if (syscall == NULL || !syscall->enabled) cmp $0x0,%rdi 0.95 → je 0 mov $0x40,%r8d switch (augmented_args.args.syscall_nr) { mov -0x140(%rbp),%rdi switch (augmented_args.args.syscall_nr) { cmp $0x2,%rdi → je 0 cmp $0x101,%rdi → je 0 cmp $0x15,%rdi → jne 0 case SYS_OPEN: filename_arg = (const void *)args->args[0]; mov 0x10(%rbx),%rdx → jmp 0 case SYS_OPENAT: filename_arg = (const void *)args->args[1]; mov 0x18(%rbx),%rdx if (filename_arg != NULL) { cmp $0x0,%rdx → je 0 xor %edi,%edi augmented_args.filename.reserved = 0; mov %edi,-0x104(%rbp) augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, mov %rbp,%rdi add $0xffffffffffffff00,%rdi augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, mov $0x100,%esi → callq *ffffffffda658499 mov $0x148,%r8d augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, mov %eax,-0x108(%rbp) augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, mov %rax,%rdi shl $0x20,%rdi shr $0x20,%rdi if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { cmp $0xff,%rdi → ja 0 len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; add $0x48,%rax len &= sizeof(augmented_args.filename.value) - 1; and $0xff,%rax mov %rax,%r8 mov %rbp,%rcx return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len); add $0xfffffffffffffeb8,%rcx mov %rbx,%rdi movabs $0xffff975fbd72d800,%rsi mov $0xffffffff,%edx → callq *ffffffffda658ad9 mov %rax,%r13 } mov %r13,%rax 0.72 mov 0x0(%rbp),%rbx mov 0x8(%rbp),%r13 1.16 mov 0x10(%rbp),%r14 0.10 mov 0x18(%rbp),%r15 0.42 add $0x28,%rbp 0.54 leaveq 0.54 ← retq # Please see 'man perf-config' to see how to control what should be seen, via ~/.perfconfig [annotate] section, for instance, one can suppress the source code and see just the disassembly, etc. Alternatively, use the TUI bu just using 'perf annotate', press '/bpf_prog' to see the bpf symbols, press enter and do the interactive annotation, which allows for dumping to a file after selecting the the various output tunables, for instance, the above without source code intermixed, plus showing all the instruction offsets: # perf annotate bpf_prog_819967866022f1e1_sys_enter Then press: 's' to hide the source code + 'O' twice to show all instruction offsets, then 'P' to print to the bpf_prog_819967866022f1e1_sys_enter.annotation file, which will have: # cat bpf_prog_819967866022f1e1_sys_enter.annotation bpf_prog_819967866022f1e1_sys_enter() bpf_prog_819967866022f1e1_sys_enter Event: cycles:ppp 53.41 0: push %rbp 0.63 1: mov %rsp,%rbp 0.31 4: sub $0x170,%rsp 1.93 b: sub $0x28,%rbp 7.02 f: mov %rbx,0x0(%rbp) 3.20 13: mov %r13,0x8(%rbp) 1.07 17: mov %r14,0x10(%rbp) 0.61 1b: mov %r15,0x18(%rbp) 0.11 1f: xor %eax,%eax 1.29 21: mov %rax,0x20(%rbp) 0.11 25: mov %rdi,%rbx 2.02 28: → callq *ffffffffda6776d9 2.76 2d: mov %eax,-0x148(%rbp) 33: mov %rbp,%rsi 36: add $0xfffffffffffffeb8,%rsi 3d: movabs $0xffff975ac2607800,%rdi 1.26 47: → callq *ffffffffda6789e9 4c: cmp $0x0,%rax 2.43 50: → je 0 52: add $0x38,%rax 0.21 56: xor %r13d,%r13d 0.81 59: cmp $0x0,%rax 5d: → jne 0 63: mov %rbp,%rdi 2.22 66: add $0xfffffffffffffeb8,%rdi 0.11 6d: mov $0x40,%esi 0.32 72: mov %rbx,%rdx 2.74 75: → callq *ffffffffda658409 0.22 7a: mov %rbp,%rsi 1.69 7d: add $0xfffffffffffffec0,%rsi 84: movabs $0xffff975bfcd36000,%rdi 8e: add $0xd0,%rdi 0.21 95: mov 0x0(%rsi),%eax 0.93 98: cmp $0x200,%rax 9f: → jae 0 0.10 a1: shl $0x3,%rax 0.11 a5: add %rdi,%rax 0.11 a8: → jmp 0 aa: xor %eax,%eax 1.07 ac: cmp $0x0,%rax b0: → je 0 6.57 b6: movzbq 0x0(%rax),%rdi bb: cmp $0x0,%rdi 0.95 bf: → je 0 c5: mov $0x40,%r8d cb: mov -0x140(%rbp),%rdi d2: cmp $0x2,%rdi d6: → je 0 d8: cmp $0x101,%rdi df: → je 0 e1: cmp $0x15,%rdi e5: → jne 0 e7: mov 0x10(%rbx),%rdx eb: → jmp 0 ed: mov 0x18(%rbx),%rdx f1: cmp $0x0,%rdx f5: → je 0 f7: xor %edi,%edi f9: mov %edi,-0x104(%rbp) ff: mov %rbp,%rdi 102: add $0xffffffffffffff00,%rdi 109: mov $0x100,%esi 10e: → callq *ffffffffda658499 113: mov $0x148,%r8d 119: mov %eax,-0x108(%rbp) 11f: mov %rax,%rdi 122: shl $0x20,%rdi 126: shr $0x20,%rdi 12a: cmp $0xff,%rdi 131: → ja 0 133: add $0x48,%rax 137: and $0xff,%rax 13d: mov %rax,%r8 140: mov %rbp,%rcx 143: add $0xfffffffffffffeb8,%rcx 14a: mov %rbx,%rdi 14d: movabs $0xffff975fbd72d800,%rsi 157: mov $0xffffffff,%edx 15c: → callq *ffffffffda658ad9 161: mov %rax,%r13 164: mov %r13,%rax 0.72 167: mov 0x0(%rbp),%rbx 16b: mov 0x8(%rbp),%r13 1.16 16f: mov 0x10(%rbp),%r14 0.10 173: mov 0x18(%rbp),%r15 0.42 177: add $0x28,%rbp 0.54 17b: leaveq 0.54 17c: ← retq Another cool way to test all this is to symple use 'perf top' look for those symbols, go there and press enter, annotate it live :-) Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-13-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 163 +++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/annotate.h | 1 + tools/perf/util/bpf-event.c | 2 +- 3 files changed, 164 insertions(+), 2 deletions(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5f6dbbf5d749..c8b01176c9e1 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -10,6 +10,10 @@ #include #include #include +#include +#include +#include +#include #include "util.h" #include "ui/ui.h" #include "sort.h" @@ -24,6 +28,7 @@ #include "annotate.h" #include "evsel.h" #include "evlist.h" +#include "bpf-event.h" #include "block-range.h" #include "string2.h" #include "arch/common.h" @@ -31,6 +36,7 @@ #include #include #include +#include /* FIXME: For the HE_COLORSET */ #include "ui/browser.h" @@ -1615,6 +1621,9 @@ int symbol__strerror_disassemble(struct symbol *sym __maybe_unused, struct map * " --vmlinux vmlinux\n", build_id_msg ?: ""); } break; + case SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF: + scnprintf(buf, buflen, "Please link with binutils's libopcode to enable BPF annotation"); + break; default: scnprintf(buf, buflen, "Internal error: Invalid %d error code\n", errnum); break; @@ -1674,6 +1683,156 @@ fallback: return 0; } +#if defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +#define PACKAGE "perf" +#include +#include + +static int symbol__disassemble_bpf(struct symbol *sym, + struct annotate_args *args) +{ + struct annotation *notes = symbol__annotation(sym); + struct annotation_options *opts = args->options; + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_linfo *prog_linfo = NULL; + struct bpf_prog_info_node *info_node; + int len = sym->end - sym->start; + disassembler_ftype disassemble; + struct map *map = args->ms.map; + struct disassemble_info info; + struct dso *dso = map->dso; + int pc = 0, count, sub_id; + struct btf *btf = NULL; + char tpath[PATH_MAX]; + size_t buf_size; + int nr_skip = 0; + int ret = -1; + char *buf; + bfd *bfdf; + FILE *s; + + if (dso->binary_type != DSO_BINARY_TYPE__BPF_PROG_INFO) + return -1; + + pr_debug("%s: handling sym %s addr %lx len %lx\n", __func__, + sym->name, sym->start, sym->end - sym->start); + + memset(tpath, 0, sizeof(tpath)); + perf_exe(tpath, sizeof(tpath)); + + bfdf = bfd_openr(tpath, NULL); + assert(bfdf); + assert(bfd_check_format(bfdf, bfd_object)); + + s = open_memstream(&buf, &buf_size); + if (!s) + goto out; + init_disassemble_info(&info, s, + (fprintf_ftype) fprintf); + + info.arch = bfd_get_arch(bfdf); + info.mach = bfd_get_mach(bfdf); + + info_node = perf_env__find_bpf_prog_info(dso->bpf_prog.env, + dso->bpf_prog.id); + if (!info_node) + goto out; + info_linear = info_node->info_linear; + sub_id = dso->bpf_prog.sub_id; + + info.buffer = (void *)(info_linear->info.jited_prog_insns); + info.buffer_length = info_linear->info.jited_prog_len; + + if (info_linear->info.nr_line_info) + prog_linfo = bpf_prog_linfo__new(&info_linear->info); + + if (info_linear->info.btf_id) { + struct btf_node *node; + + node = perf_env__find_btf(dso->bpf_prog.env, + info_linear->info.btf_id); + if (node) + btf = btf__new((__u8 *)(node->data), + node->data_size); + } + + disassemble_init_for_target(&info); + +#ifdef DISASM_FOUR_ARGS_SIGNATURE + disassemble = disassembler(info.arch, + bfd_big_endian(bfdf), + info.mach, + bfdf); +#else + disassemble = disassembler(bfdf); +#endif + assert(disassemble); + + fflush(s); + do { + const struct bpf_line_info *linfo = NULL; + struct disasm_line *dl; + size_t prev_buf_size; + const char *srcline; + u64 addr; + + addr = pc + ((u64 *)(info_linear->info.jited_ksyms))[sub_id]; + count = disassemble(pc, &info); + + if (prog_linfo) + linfo = bpf_prog_linfo__lfind_addr_func(prog_linfo, + addr, sub_id, + nr_skip); + + if (linfo && btf) { + srcline = btf__name_by_offset(btf, linfo->line_off); + nr_skip++; + } else + srcline = NULL; + + fprintf(s, "\n"); + prev_buf_size = buf_size; + fflush(s); + + if (!opts->hide_src_code && srcline) { + args->offset = -1; + args->line = strdup(srcline); + args->line_nr = 0; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) { + annotation_line__add(&dl->al, + ¬es->src->source); + } + } + + args->offset = pc; + args->line = buf + prev_buf_size; + args->line_nr = 0; + args->ms.sym = sym; + dl = disasm_line__new(args); + if (dl) + annotation_line__add(&dl->al, ¬es->src->source); + + pc += count; + } while (count > 0 && pc < len); + + ret = 0; +out: + free(prog_linfo); + free(btf); + fclose(s); + bfd_close(bfdf); + return ret; +} +#else // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) +static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused, + struct annotate_args *args __maybe_unused) +{ + return SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF; +} +#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT) + static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct annotation_options *opts = args->options; @@ -1701,7 +1860,9 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso->long_name, sym, sym->name); - if (dso__is_kcore(dso)) { + if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) { + return symbol__disassemble_bpf(sym, args); + } else if (dso__is_kcore(dso)) { kce.kcore_filename = symfs_filename; kce.addr = map__rip_2objdump(map, sym->start); kce.offs = sym->start; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index df34fe483164..5bc0cf655d37 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -369,6 +369,7 @@ enum symbol_disassemble_errno { __SYMBOL_ANNOTATE_ERRNO__START = -10000, SYMBOL_ANNOTATE_ERRNO__NO_VMLINUX = __SYMBOL_ANNOTATE_ERRNO__START, + SYMBOL_ANNOTATE_ERRNO__NO_LIBOPCODES_FOR_BPF, __SYMBOL_ANNOTATE_ERRNO__END, }; diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 852e960692cb..7ffe7db59828 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -46,7 +46,7 @@ static int machine__process_bpf_event_load(struct machine *machine, info_linear = info_node->info_linear; for (i = 0; i < info_linear->info.nr_jited_ksyms; i++) { - u64 *addrs = (u64 *)(info_linear->info.jited_ksyms); + u64 *addrs = (u64 *)(uintptr_t)(info_linear->info.jited_ksyms); u64 addr = addrs[i]; struct map *map; -- cgit From 536d3680fd2dab5c39857d62a3e084198fc74ff9 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Dequeue RX packets explicitly The ks8851 driver lets the chip auto-dequeue received packets once they have been read in full. It achieves that by setting the ADRFE flag in the RXQCR register ("Auto-Dequeue RXQ Frame Enable"). However if allocation of a packet's socket buffer or retrieval of the packet over the SPI bus fails, the packet will not have been read in full and is not auto-dequeued. Such partial retrieval of a packet confuses the chip's RX queue management: On the next RX interrupt, the first packet read from the queue will be the one left there previously and this one can be retrieved without issues. But for any newly received packets, the frame header status and byte count registers (RXFHSR and RXFHBCR) contain bogus values, preventing their retrieval. The chip allows explicitly dequeueing a packet from the RX queue by setting the RRXEF flag in the RXQCR register ("Release RX Error Frame"). This could be used to dequeue the packet in case of an error, but if that error is a failed SPI transfer, it is unknown if the packet was transferred in full and was auto-dequeued or if it was only transferred in part and requires an explicit dequeue. The safest approach is thus to always dequeue packets explicitly and forgo auto-dequeueing. Without this change, I've witnessed packet retrieval break completely when an SPI DMA transfer fails, requiring a chip reset. Explicit dequeueing magically fixes this and makes packet retrieval absolutely robust for me. The chip's documentation suggests auto-dequeuing and uses the RRXEF flag only to dequeue error frames which the driver doesn't want to retrieve. But that seems to be a fair-weather approach. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index bd6e9014bc74..a93f8e842c07 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -535,9 +535,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) /* set dma read address */ ks8851_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI | 0x00); - /* start the packet dma process, and set auto-dequeue rx */ - ks8851_wrreg16(ks, KS_RXQCR, - ks->rc_rxqcr | RXQCR_SDA | RXQCR_ADRFE); + /* start DMA access */ + ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA); if (rxlen > 4) { unsigned int rxalign; @@ -568,7 +567,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks) } } - ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr); + /* end DMA access and dequeue packet */ + ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_RRXEF); } } -- cgit From 761cfa979a0c177d6c2d93ef5585cd79ae49a7d5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Reassert reset pin if chip ID check fails Commit 73fdeb82e963 ("net: ks8851: Add optional vdd_io regulator and reset gpio") amended the ks8851 driver to briefly assert the chip's reset pin on probe. It also amended the probe routine's error path to reassert the reset pin if a subsequent initialization step fails. However the commit misplaced reassertion of the reset pin in the error path such that it is not performed if the check of the Chip ID and Enable Register (CIDER) fails. The error path is therefore slightly asymmetrical to the probe routine's body. Fix it. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Stephen Boyd Cc: Nishanth Menon Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index a93f8e842c07..1633fa5c709c 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -1554,9 +1554,9 @@ err_netdev: free_irq(ndev->irq, ks); err_irq: +err_id: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); -err_id: regulator_disable(ks->vdd_reg); err_reg: regulator_disable(ks->vdd_io); -- cgit From d268f31552794abf5b6aa5af31021643411f25f5 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Delay requesting IRQ until opened The ks8851 driver currently requests the IRQ before registering the net_device. Because the net_device name is used as IRQ name and is still "eth%d" when the IRQ is requested, it's impossibe to tell IRQs apart if multiple ks8851 chips are present. Most other drivers delay requesting the IRQ until the net_device is opened. Do the same. The driver doesn't enable interrupts on the chip before opening the net_device and disables them when closing it, so there doesn't seem to be a need to request the IRQ already on probe. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index 1633fa5c709c..c9faec4c5b25 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -785,6 +785,15 @@ static void ks8851_tx_work(struct work_struct *work) static int ks8851_net_open(struct net_device *dev) { struct ks8851_net *ks = netdev_priv(dev); + int ret; + + ret = request_threaded_irq(dev->irq, NULL, ks8851_irq, + IRQF_TRIGGER_LOW | IRQF_ONESHOT, + dev->name, ks); + if (ret < 0) { + netdev_err(dev, "failed to get irq\n"); + return ret; + } /* lock the card, even if we may not actually be doing anything * else at the moment */ @@ -899,6 +908,8 @@ static int ks8851_net_stop(struct net_device *dev) dev_kfree_skb(txb); } + free_irq(dev->irq, ks); + return 0; } @@ -1529,14 +1540,6 @@ static int ks8851_probe(struct spi_device *spi) ks8851_read_selftest(ks); ks8851_init_mac(ks); - ret = request_threaded_irq(spi->irq, NULL, ks8851_irq, - IRQF_TRIGGER_LOW | IRQF_ONESHOT, - ndev->name, ks); - if (ret < 0) { - dev_err(&spi->dev, "failed to get irq\n"); - goto err_irq; - } - ret = register_netdev(ndev); if (ret) { dev_err(&spi->dev, "failed to register network device\n"); @@ -1549,11 +1552,7 @@ static int ks8851_probe(struct spi_device *spi) return 0; - err_netdev: - free_irq(ndev->irq, ks); - -err_irq: err_id: if (gpio_is_valid(gpio)) gpio_set_value(gpio, 0); @@ -1574,7 +1573,6 @@ static int ks8851_remove(struct spi_device *spi) dev_info(&spi->dev, "remove\n"); unregister_netdev(priv->netdev); - free_irq(spi->irq, priv); if (gpio_is_valid(priv->gpio)) gpio_set_value(priv->gpio, 0); regulator_disable(priv->vdd_reg); -- cgit From 9624bafa5f6418b9ca5b3f66d1f6a6a2e8bf6d4c Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Set initial carrier state to down The ks8851 chip's initial carrier state is down. A Link Change Interrupt is signaled once interrupts are enabled if the carrier is up. The ks8851 driver has it backwards by assuming that the initial carrier state is up. The state is therefore misrepresented if the interface is opened with no cable attached. Fix it. The Link Change interrupt is sometimes not signaled unless the P1MBSR register (which contains the Link Status bit) is read on ->ndo_open(). This might be a hardware erratum. Read the register by calling mii_check_link(), which has the desirable side effect of setting the carrier state to down if the cable was detached while the interface was closed. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index c9faec4c5b25..b83b070a9eec 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -858,6 +858,7 @@ static int ks8851_net_open(struct net_device *dev) netif_dbg(ks, ifup, ks->netdev, "network device up\n"); mutex_unlock(&ks->lock); + mii_check_link(&ks->mii); return 0; } @@ -1519,6 +1520,7 @@ static int ks8851_probe(struct spi_device *spi) spi_set_drvdata(spi, ks); + netif_carrier_off(ks->netdev); ndev->if_port = IF_PORT_100BASET; ndev->netdev_ops = &ks8851_netdev_ops; ndev->irq = spi->irq; -- cgit From cbda74a12c4b738feb90752fbca3648d24646079 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Fix register macro misnomers In the header file accompanying the ks8851 driver, the P1SCLMD register macros are misnamed, they actually pertain to the P1CR register. The P1CR macros in turn pertain to the P1SR register, see pages 65 to 68 of the spec: http://www.hqchip.com/uploads/pdf/201703/47c98946d6c97a4766e14db3f24955f2.pdf The misnomers have no negative consequences so far because the macros aren't used by ks8851.c, but that's about to change. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.h | 52 +++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index 852256ef1f22..1050fa48b73c 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -257,33 +257,35 @@ #define KS_P1ANLPR 0xEE #define KS_P1SCLMD 0xF4 -#define P1SCLMD_LEDOFF (1 << 15) -#define P1SCLMD_TXIDS (1 << 14) -#define P1SCLMD_RESTARTAN (1 << 13) -#define P1SCLMD_DISAUTOMDIX (1 << 10) -#define P1SCLMD_FORCEMDIX (1 << 9) -#define P1SCLMD_AUTONEGEN (1 << 7) -#define P1SCLMD_FORCE100 (1 << 6) -#define P1SCLMD_FORCEFDX (1 << 5) -#define P1SCLMD_ADV_FLOW (1 << 4) -#define P1SCLMD_ADV_100BT_FDX (1 << 3) -#define P1SCLMD_ADV_100BT_HDX (1 << 2) -#define P1SCLMD_ADV_10BT_FDX (1 << 1) -#define P1SCLMD_ADV_10BT_HDX (1 << 0) #define KS_P1CR 0xF6 -#define P1CR_HP_MDIX (1 << 15) -#define P1CR_REV_POL (1 << 13) -#define P1CR_OP_100M (1 << 10) -#define P1CR_OP_FDX (1 << 9) -#define P1CR_OP_MDI (1 << 7) -#define P1CR_AN_DONE (1 << 6) -#define P1CR_LINK_GOOD (1 << 5) -#define P1CR_PNTR_FLOW (1 << 4) -#define P1CR_PNTR_100BT_FDX (1 << 3) -#define P1CR_PNTR_100BT_HDX (1 << 2) -#define P1CR_PNTR_10BT_FDX (1 << 1) -#define P1CR_PNTR_10BT_HDX (1 << 0) +#define P1CR_LEDOFF (1 << 15) +#define P1CR_TXIDS (1 << 14) +#define P1CR_RESTARTAN (1 << 13) +#define P1CR_DISAUTOMDIX (1 << 10) +#define P1CR_FORCEMDIX (1 << 9) +#define P1CR_AUTONEGEN (1 << 7) +#define P1CR_FORCE100 (1 << 6) +#define P1CR_FORCEFDX (1 << 5) +#define P1CR_ADV_FLOW (1 << 4) +#define P1CR_ADV_100BT_FDX (1 << 3) +#define P1CR_ADV_100BT_HDX (1 << 2) +#define P1CR_ADV_10BT_FDX (1 << 1) +#define P1CR_ADV_10BT_HDX (1 << 0) + +#define KS_P1SR 0xF8 +#define P1SR_HP_MDIX (1 << 15) +#define P1SR_REV_POL (1 << 13) +#define P1SR_OP_100M (1 << 10) +#define P1SR_OP_FDX (1 << 9) +#define P1SR_OP_MDI (1 << 7) +#define P1SR_AN_DONE (1 << 6) +#define P1SR_LINK_GOOD (1 << 5) +#define P1SR_PNTR_FLOW (1 << 4) +#define P1SR_PNTR_100BT_FDX (1 << 3) +#define P1SR_PNTR_100BT_HDX (1 << 2) +#define P1SR_PNTR_10BT_FDX (1 << 1) +#define P1SR_PNTR_10BT_HDX (1 << 0) /* TX Frame control */ -- cgit From aae079aa76d0cc3e679db31370364cb87a405651 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 15:02:00 +0100 Subject: net: ks8851: Deduplicate register macros The ks8851 chip is sold either with an SPI interface (KSZ8851SNL) or with a so-called non-PCI interface (KSZ8851-16MLL). When the driver for the latter was introduced with commit a55c0a0ed415 ("drivers/net: ks8851_mll ethernet network driver"), it duplicated the register macros introduced by the driver for the former with commit 3ba81f3ece3c ("net: Micrel KS8851 SPI network driver"). The chips are almost identical, so the duplication seems unwarranted. There are a handful of bits which are in use on the KSZ8851-16MLL but reserved on the KSZ8851SNL, and vice-versa, but there are no actual collisions. Thus, remove the duplicate definitions from the KSZ8851-16MLL driver. Mark all bits which differ between the two chips. Move the SPI frame opcodes, which are specific to KSZ8851SNL, to its driver. The KSZ8851-16MLL driver added a RXFCTR_THRESHOLD_MASK macro which is a duplication of the RXFCTR_RXFCT_MASK macro, rename it where it's used. Same for P1MBCR_FORCE_FDX, which duplicates the BMCR_FULLDPLX macro and OBCR_ODS_16MA, which duplicates OBCR_ODS_16mA. Signed-off-by: Lukas Wunner Cc: Frank Pavlic Cc: Ben Dooks Cc: Tristram Ha Signed-off-by: David S. Miller --- drivers/net/ethernet/micrel/ks8851.c | 6 + drivers/net/ethernet/micrel/ks8851.h | 41 ++-- drivers/net/ethernet/micrel/ks8851_mll.c | 317 +------------------------------ 3 files changed, 34 insertions(+), 330 deletions(-) diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c index b83b070a9eec..7849119d407a 100644 --- a/drivers/net/ethernet/micrel/ks8851.c +++ b/drivers/net/ethernet/micrel/ks8851.c @@ -142,6 +142,12 @@ struct ks8851_net { static int msg_enable; +/* SPI frame opcodes */ +#define KS_SPIOP_RD (0x00) +#define KS_SPIOP_WR (0x40) +#define KS_SPIOP_RXFIFO (0x80) +#define KS_SPIOP_TXFIFO (0xC0) + /* shift for byte-enable data */ #define BYTE_EN(_x) ((_x) << 2) diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h index 1050fa48b73c..23da1e3ee429 100644 --- a/drivers/net/ethernet/micrel/ks8851.h +++ b/drivers/net/ethernet/micrel/ks8851.h @@ -11,9 +11,15 @@ */ #define KS_CCR 0x08 +#define CCR_LE (1 << 10) /* KSZ8851-16MLL */ #define CCR_EEPROM (1 << 9) -#define CCR_SPI (1 << 8) -#define CCR_32PIN (1 << 0) +#define CCR_SPI (1 << 8) /* KSZ8851SNL */ +#define CCR_8BIT (1 << 7) /* KSZ8851-16MLL */ +#define CCR_16BIT (1 << 6) /* KSZ8851-16MLL */ +#define CCR_32BIT (1 << 5) /* KSZ8851-16MLL */ +#define CCR_SHARED (1 << 4) /* KSZ8851-16MLL */ +#define CCR_48PIN (1 << 1) /* KSZ8851-16MLL */ +#define CCR_32PIN (1 << 0) /* KSZ8851SNL */ /* MAC address registers */ #define KS_MAR(_m) (0x15 - (_m)) @@ -112,13 +118,13 @@ #define RXCR1_RXE (1 << 0) #define KS_RXCR2 0x76 -#define RXCR2_SRDBL_MASK (0x7 << 5) -#define RXCR2_SRDBL_SHIFT (5) -#define RXCR2_SRDBL_4B (0x0 << 5) -#define RXCR2_SRDBL_8B (0x1 << 5) -#define RXCR2_SRDBL_16B (0x2 << 5) -#define RXCR2_SRDBL_32B (0x3 << 5) -#define RXCR2_SRDBL_FRAME (0x4 << 5) +#define RXCR2_SRDBL_MASK (0x7 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_SHIFT (5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_4B (0x0 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_8B (0x1 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_16B (0x2 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_32B (0x3 << 5) /* KSZ8851SNL */ +#define RXCR2_SRDBL_FRAME (0x4 << 5) /* KSZ8851SNL */ #define RXCR2_IUFFP (1 << 4) #define RXCR2_RXIUFCEZ (1 << 3) #define RXCR2_UDPLFE (1 << 2) @@ -143,8 +149,10 @@ #define RXFSHR_RXCE (1 << 0) #define KS_RXFHBCR 0x7E +#define RXFHBCR_CNT_MASK (0xfff << 0) + #define KS_TXQCR 0x80 -#define TXQCR_AETFE (1 << 2) +#define TXQCR_AETFE (1 << 2) /* KSZ8851SNL */ #define TXQCR_TXQMAM (1 << 1) #define TXQCR_METFE (1 << 0) @@ -167,6 +175,10 @@ #define KS_RXFDPR 0x86 #define RXFDPR_RXFPAI (1 << 14) +#define RXFDPR_WST (1 << 12) /* KSZ8851-16MLL */ +#define RXFDPR_EMS (1 << 11) /* KSZ8851-16MLL */ +#define RXFDPR_RXFP_MASK (0x7ff << 0) +#define RXFDPR_RXFP_SHIFT (0) #define KS_RXDTTR 0x8C #define KS_RXDBCTR 0x8E @@ -184,7 +196,7 @@ #define IRQ_RXMPDI (1 << 4) #define IRQ_LDI (1 << 3) #define IRQ_EDI (1 << 2) -#define IRQ_SPIBEI (1 << 1) +#define IRQ_SPIBEI (1 << 1) /* KSZ8851SNL */ #define IRQ_DEDI (1 << 0) #define KS_RXFCTR 0x9C @@ -288,13 +300,6 @@ #define P1SR_PNTR_10BT_HDX (1 << 0) /* TX Frame control */ - #define TXFR_TXIC (1 << 15) #define TXFR_TXFID_MASK (0x3f << 0) #define TXFR_TXFID_SHIFT (0) - -/* SPI frame opcodes */ -#define KS_SPIOP_RD (0x00) -#define KS_SPIOP_WR (0x40) -#define KS_SPIOP_RXFIFO (0x80) -#define KS_SPIOP_TXFIFO (0xC0) diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c index 35f8c9ef204d..c946841c0a06 100644 --- a/drivers/net/ethernet/micrel/ks8851_mll.c +++ b/drivers/net/ethernet/micrel/ks8851_mll.c @@ -40,6 +40,8 @@ #include #include +#include "ks8851.h" + #define DRV_NAME "ks8851_mll" static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 }; @@ -48,319 +50,10 @@ static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 }; #define TX_BUF_SIZE 2000 #define RX_BUF_SIZE 2000 -#define KS_CCR 0x08 -#define CCR_EEPROM (1 << 9) -#define CCR_SPI (1 << 8) -#define CCR_8BIT (1 << 7) -#define CCR_16BIT (1 << 6) -#define CCR_32BIT (1 << 5) -#define CCR_SHARED (1 << 4) -#define CCR_32PIN (1 << 0) - -/* MAC address registers */ -#define KS_MARL 0x10 -#define KS_MARM 0x12 -#define KS_MARH 0x14 - -#define KS_OBCR 0x20 -#define OBCR_ODS_16MA (1 << 6) - -#define KS_EEPCR 0x22 -#define EEPCR_EESA (1 << 4) -#define EEPCR_EESB (1 << 3) -#define EEPCR_EEDO (1 << 2) -#define EEPCR_EESCK (1 << 1) -#define EEPCR_EECS (1 << 0) - -#define KS_MBIR 0x24 -#define MBIR_TXMBF (1 << 12) -#define MBIR_TXMBFA (1 << 11) -#define MBIR_RXMBF (1 << 4) -#define MBIR_RXMBFA (1 << 3) - -#define KS_GRR 0x26 -#define GRR_QMU (1 << 1) -#define GRR_GSR (1 << 0) - -#define KS_WFCR 0x2A -#define WFCR_MPRXE (1 << 7) -#define WFCR_WF3E (1 << 3) -#define WFCR_WF2E (1 << 2) -#define WFCR_WF1E (1 << 1) -#define WFCR_WF0E (1 << 0) - -#define KS_WF0CRC0 0x30 -#define KS_WF0CRC1 0x32 -#define KS_WF0BM0 0x34 -#define KS_WF0BM1 0x36 -#define KS_WF0BM2 0x38 -#define KS_WF0BM3 0x3A - -#define KS_WF1CRC0 0x40 -#define KS_WF1CRC1 0x42 -#define KS_WF1BM0 0x44 -#define KS_WF1BM1 0x46 -#define KS_WF1BM2 0x48 -#define KS_WF1BM3 0x4A - -#define KS_WF2CRC0 0x50 -#define KS_WF2CRC1 0x52 -#define KS_WF2BM0 0x54 -#define KS_WF2BM1 0x56 -#define KS_WF2BM2 0x58 -#define KS_WF2BM3 0x5A - -#define KS_WF3CRC0 0x60 -#define KS_WF3CRC1 0x62 -#define KS_WF3BM0 0x64 -#define KS_WF3BM1 0x66 -#define KS_WF3BM2 0x68 -#define KS_WF3BM3 0x6A - -#define KS_TXCR 0x70 -#define TXCR_TCGICMP (1 << 8) -#define TXCR_TCGUDP (1 << 7) -#define TXCR_TCGTCP (1 << 6) -#define TXCR_TCGIP (1 << 5) -#define TXCR_FTXQ (1 << 4) -#define TXCR_TXFCE (1 << 3) -#define TXCR_TXPE (1 << 2) -#define TXCR_TXCRC (1 << 1) -#define TXCR_TXE (1 << 0) - -#define KS_TXSR 0x72 -#define TXSR_TXLC (1 << 13) -#define TXSR_TXMC (1 << 12) -#define TXSR_TXFID_MASK (0x3f << 0) -#define TXSR_TXFID_SHIFT (0) -#define TXSR_TXFID_GET(_v) (((_v) >> 0) & 0x3f) - - -#define KS_RXCR1 0x74 -#define RXCR1_FRXQ (1 << 15) -#define RXCR1_RXUDPFCC (1 << 14) -#define RXCR1_RXTCPFCC (1 << 13) -#define RXCR1_RXIPFCC (1 << 12) -#define RXCR1_RXPAFMA (1 << 11) -#define RXCR1_RXFCE (1 << 10) -#define RXCR1_RXEFE (1 << 9) -#define RXCR1_RXMAFMA (1 << 8) -#define RXCR1_RXBE (1 << 7) -#define RXCR1_RXME (1 << 6) -#define RXCR1_RXUE (1 << 5) -#define RXCR1_RXAE (1 << 4) -#define RXCR1_RXINVF (1 << 1) -#define RXCR1_RXE (1 << 0) #define RXCR1_FILTER_MASK (RXCR1_RXINVF | RXCR1_RXAE | \ RXCR1_RXMAFMA | RXCR1_RXPAFMA) - -#define KS_RXCR2 0x76 -#define RXCR2_SRDBL_MASK (0x7 << 5) -#define RXCR2_SRDBL_SHIFT (5) -#define RXCR2_SRDBL_4B (0x0 << 5) -#define RXCR2_SRDBL_8B (0x1 << 5) -#define RXCR2_SRDBL_16B (0x2 << 5) -#define RXCR2_SRDBL_32B (0x3 << 5) -/* #define RXCR2_SRDBL_FRAME (0x4 << 5) */ -#define RXCR2_IUFFP (1 << 4) -#define RXCR2_RXIUFCEZ (1 << 3) -#define RXCR2_UDPLFE (1 << 2) -#define RXCR2_RXICMPFCC (1 << 1) -#define RXCR2_RXSAF (1 << 0) - -#define KS_TXMIR 0x78 - -#define KS_RXFHSR 0x7C -#define RXFSHR_RXFV (1 << 15) -#define RXFSHR_RXICMPFCS (1 << 13) -#define RXFSHR_RXIPFCS (1 << 12) -#define RXFSHR_RXTCPFCS (1 << 11) -#define RXFSHR_RXUDPFCS (1 << 10) -#define RXFSHR_RXBF (1 << 7) -#define RXFSHR_RXMF (1 << 6) -#define RXFSHR_RXUF (1 << 5) -#define RXFSHR_RXMR (1 << 4) -#define RXFSHR_RXFT (1 << 3) -#define RXFSHR_RXFTL (1 << 2) -#define RXFSHR_RXRF (1 << 1) -#define RXFSHR_RXCE (1 << 0) -#define RXFSHR_ERR (RXFSHR_RXCE | RXFSHR_RXRF |\ - RXFSHR_RXFTL | RXFSHR_RXMR |\ - RXFSHR_RXICMPFCS | RXFSHR_RXIPFCS |\ - RXFSHR_RXTCPFCS) -#define KS_RXFHBCR 0x7E -#define RXFHBCR_CNT_MASK 0x0FFF - -#define KS_TXQCR 0x80 -#define TXQCR_AETFE (1 << 2) -#define TXQCR_TXQMAM (1 << 1) -#define TXQCR_METFE (1 << 0) - -#define KS_RXQCR 0x82 -#define RXQCR_RXDTTS (1 << 12) -#define RXQCR_RXDBCTS (1 << 11) -#define RXQCR_RXFCTS (1 << 10) -#define RXQCR_RXIPHTOE (1 << 9) -#define RXQCR_RXDTTE (1 << 7) -#define RXQCR_RXDBCTE (1 << 6) -#define RXQCR_RXFCTE (1 << 5) -#define RXQCR_ADRFE (1 << 4) -#define RXQCR_SDA (1 << 3) -#define RXQCR_RRXEF (1 << 0) #define RXQCR_CMD_CNTL (RXQCR_RXFCTE|RXQCR_ADRFE) -#define KS_TXFDPR 0x84 -#define TXFDPR_TXFPAI (1 << 14) -#define TXFDPR_TXFP_MASK (0x7ff << 0) -#define TXFDPR_TXFP_SHIFT (0) - -#define KS_RXFDPR 0x86 -#define RXFDPR_RXFPAI (1 << 14) - -#define KS_RXDTTR 0x8C -#define KS_RXDBCTR 0x8E - -#define KS_IER 0x90 -#define KS_ISR 0x92 -#define IRQ_LCI (1 << 15) -#define IRQ_TXI (1 << 14) -#define IRQ_RXI (1 << 13) -#define IRQ_RXOI (1 << 11) -#define IRQ_TXPSI (1 << 9) -#define IRQ_RXPSI (1 << 8) -#define IRQ_TXSAI (1 << 6) -#define IRQ_RXWFDI (1 << 5) -#define IRQ_RXMPDI (1 << 4) -#define IRQ_LDI (1 << 3) -#define IRQ_EDI (1 << 2) -#define IRQ_SPIBEI (1 << 1) -#define IRQ_DEDI (1 << 0) - -#define KS_RXFCTR 0x9C -#define RXFCTR_THRESHOLD_MASK 0x00FF - -#define KS_RXFC 0x9D -#define RXFCTR_RXFC_MASK (0xff << 8) -#define RXFCTR_RXFC_SHIFT (8) -#define RXFCTR_RXFC_GET(_v) (((_v) >> 8) & 0xff) -#define RXFCTR_RXFCT_MASK (0xff << 0) -#define RXFCTR_RXFCT_SHIFT (0) - -#define KS_TXNTFSR 0x9E - -#define KS_MAHTR0 0xA0 -#define KS_MAHTR1 0xA2 -#define KS_MAHTR2 0xA4 -#define KS_MAHTR3 0xA6 - -#define KS_FCLWR 0xB0 -#define KS_FCHWR 0xB2 -#define KS_FCOWR 0xB4 - -#define KS_CIDER 0xC0 -#define CIDER_ID 0x8870 -#define CIDER_REV_MASK (0x7 << 1) -#define CIDER_REV_SHIFT (1) -#define CIDER_REV_GET(_v) (((_v) >> 1) & 0x7) - -#define KS_CGCR 0xC6 -#define KS_IACR 0xC8 -#define IACR_RDEN (1 << 12) -#define IACR_TSEL_MASK (0x3 << 10) -#define IACR_TSEL_SHIFT (10) -#define IACR_TSEL_MIB (0x3 << 10) -#define IACR_ADDR_MASK (0x1f << 0) -#define IACR_ADDR_SHIFT (0) - -#define KS_IADLR 0xD0 -#define KS_IAHDR 0xD2 - -#define KS_PMECR 0xD4 -#define PMECR_PME_DELAY (1 << 14) -#define PMECR_PME_POL (1 << 12) -#define PMECR_WOL_WAKEUP (1 << 11) -#define PMECR_WOL_MAGICPKT (1 << 10) -#define PMECR_WOL_LINKUP (1 << 9) -#define PMECR_WOL_ENERGY (1 << 8) -#define PMECR_AUTO_WAKE_EN (1 << 7) -#define PMECR_WAKEUP_NORMAL (1 << 6) -#define PMECR_WKEVT_MASK (0xf << 2) -#define PMECR_WKEVT_SHIFT (2) -#define PMECR_WKEVT_GET(_v) (((_v) >> 2) & 0xf) -#define PMECR_WKEVT_ENERGY (0x1 << 2) -#define PMECR_WKEVT_LINK (0x2 << 2) -#define PMECR_WKEVT_MAGICPKT (0x4 << 2) -#define PMECR_WKEVT_FRAME (0x8 << 2) -#define PMECR_PM_MASK (0x3 << 0) -#define PMECR_PM_SHIFT (0) -#define PMECR_PM_NORMAL (0x0 << 0) -#define PMECR_PM_ENERGY (0x1 << 0) -#define PMECR_PM_SOFTDOWN (0x2 << 0) -#define PMECR_PM_POWERSAVE (0x3 << 0) - -/* Standard MII PHY data */ -#define KS_P1MBCR 0xE4 -#define P1MBCR_FORCE_FDX (1 << 8) - -#define KS_P1MBSR 0xE6 -#define P1MBSR_AN_COMPLETE (1 << 5) -#define P1MBSR_AN_CAPABLE (1 << 3) -#define P1MBSR_LINK_UP (1 << 2) - -#define KS_PHY1ILR 0xE8 -#define KS_PHY1IHR 0xEA -#define KS_P1ANAR 0xEC -#define KS_P1ANLPR 0xEE - -#define KS_P1SCLMD 0xF4 -#define P1SCLMD_LEDOFF (1 << 15) -#define P1SCLMD_TXIDS (1 << 14) -#define P1SCLMD_RESTARTAN (1 << 13) -#define P1SCLMD_DISAUTOMDIX (1 << 10) -#define P1SCLMD_FORCEMDIX (1 << 9) -#define P1SCLMD_AUTONEGEN (1 << 7) -#define P1SCLMD_FORCE100 (1 << 6) -#define P1SCLMD_FORCEFDX (1 << 5) -#define P1SCLMD_ADV_FLOW (1 << 4) -#define P1SCLMD_ADV_100BT_FDX (1 << 3) -#define P1SCLMD_ADV_100BT_HDX (1 << 2) -#define P1SCLMD_ADV_10BT_FDX (1 << 1) -#define P1SCLMD_ADV_10BT_HDX (1 << 0) - -#define KS_P1CR 0xF6 -#define P1CR_HP_MDIX (1 << 15) -#define P1CR_REV_POL (1 << 13) -#define P1CR_OP_100M (1 << 10) -#define P1CR_OP_FDX (1 << 9) -#define P1CR_OP_MDI (1 << 7) -#define P1CR_AN_DONE (1 << 6) -#define P1CR_LINK_GOOD (1 << 5) -#define P1CR_PNTR_FLOW (1 << 4) -#define P1CR_PNTR_100BT_FDX (1 << 3) -#define P1CR_PNTR_100BT_HDX (1 << 2) -#define P1CR_PNTR_10BT_FDX (1 << 1) -#define P1CR_PNTR_10BT_HDX (1 << 0) - -/* TX Frame control */ - -#define TXFR_TXIC (1 << 15) -#define TXFR_TXFID_MASK (0x3f << 0) -#define TXFR_TXFID_SHIFT (0) - -#define KS_P1SR 0xF8 -#define P1SR_HP_MDIX (1 << 15) -#define P1SR_REV_POL (1 << 13) -#define P1SR_OP_100M (1 << 10) -#define P1SR_OP_FDX (1 << 9) -#define P1SR_OP_MDI (1 << 7) -#define P1SR_AN_DONE (1 << 6) -#define P1SR_LINK_GOOD (1 << 5) -#define P1SR_PNTR_FLOW (1 << 4) -#define P1SR_PNTR_100BT_FDX (1 << 3) -#define P1SR_PNTR_100BT_HDX (1 << 2) -#define P1SR_PNTR_10BT_FDX (1 << 1) -#define P1SR_PNTR_10BT_HDX (1 << 0) - #define ENUM_BUS_NONE 0 #define ENUM_BUS_8BIT 1 #define ENUM_BUS_16BIT 2 @@ -1475,7 +1168,7 @@ static void ks_setup(struct ks_net *ks) ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI); /* Setup Receive Frame Threshold - 1 frame (RXFCTFC) */ - ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_THRESHOLD_MASK); + ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_RXFCT_MASK); /* Setup RxQ Command Control (RXQCR) */ ks->rc_rxqcr = RXQCR_CMD_CNTL; @@ -1488,7 +1181,7 @@ static void ks_setup(struct ks_net *ks) */ w = ks_rdreg16(ks, KS_P1MBCR); - w &= ~P1MBCR_FORCE_FDX; + w &= ~BMCR_FULLDPLX; ks_wrreg16(ks, KS_P1MBCR, w); w = TXCR_TXFCE | TXCR_TXPE | TXCR_TXCRC | TXCR_TCGIP; @@ -1629,7 +1322,7 @@ static int ks8851_probe(struct platform_device *pdev) ks_setup_int(ks); data = ks_rdreg16(ks, KS_OBCR); - ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16MA); + ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16mA); /* overwriting the default MAC address */ if (pdev->dev.of_node) { -- cgit From 29ece8b4354f8c5eaee798a3d8a1b356efee426f Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Mon, 18 Mar 2019 22:44:41 +0800 Subject: block: add BLK_MQ_POLL_CLASSIC for hybrid poll and return EINVAL for unexpected value For q->poll_nsec == -1, means doing classic poll, not hybrid poll. We introduce a new flag BLK_MQ_POLL_CLASSIC to replace -1, which may make code much easier to read. Additionally, since val is an int obtained with kstrtoint(), val can be a negative value other than -1, so return -EINVAL for that case. Thanks to Damien Le Moal for some good suggestion. Reviewed-by: Damien Le Moal Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/blk-mq.c | 4 ++-- block/blk-sysfs.c | 12 +++++++----- include/linux/blkdev.h | 3 +++ 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index ea01c23b58a3..76a3f78c566a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2856,7 +2856,7 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* * Default to classic polling */ - q->poll_nsec = -1; + q->poll_nsec = BLK_MQ_POLL_CLASSIC; blk_mq_init_cpu_queues(q, set->nr_hw_queues); blk_mq_add_queue_tag_set(set, q); @@ -3391,7 +3391,7 @@ static bool blk_mq_poll_hybrid(struct request_queue *q, { struct request *rq; - if (q->poll_nsec == -1) + if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) return false; if (!blk_qc_t_is_internal(cookie)) diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 59685918167e..422327089e0f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -360,8 +360,8 @@ static ssize_t queue_poll_delay_show(struct request_queue *q, char *page) { int val; - if (q->poll_nsec == -1) - val = -1; + if (q->poll_nsec == BLK_MQ_POLL_CLASSIC) + val = BLK_MQ_POLL_CLASSIC; else val = q->poll_nsec / 1000; @@ -380,10 +380,12 @@ static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page, if (err < 0) return err; - if (val == -1) - q->poll_nsec = -1; - else + if (val == BLK_MQ_POLL_CLASSIC) + q->poll_nsec = BLK_MQ_POLL_CLASSIC; + else if (val >= 0) q->poll_nsec = val * 1000; + else + return -EINVAL; return count; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0de92b29f589..5c58a3b2bf00 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -50,6 +50,9 @@ struct blk_stat_callback; /* Must be consistent with blk_mq_poll_stats_bkt() */ #define BLK_MQ_POLL_STATS_BKTS 16 +/* Doing classic polling */ +#define BLK_MQ_POLL_CLASSIC -1 + /* * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. -- cgit From e6c987120e24cb913cb7bd4e675129a30fa49e0d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 20 Mar 2019 13:14:37 -0700 Subject: block: Unexport blk_mq_add_to_requeue_list() This function is not used outside the block layer core. Hence unexport it. Cc: Christoph Hellwig Cc: Ming Lei Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-mq.c | 1 - block/blk-mq.h | 2 ++ include/linux/blk-mq.h | 2 -- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 76a3f78c566a..70b210a308c4 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -782,7 +782,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, if (kick_requeue_list) blk_mq_kick_requeue_list(q); } -EXPORT_SYMBOL(blk_mq_add_to_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q) { diff --git a/block/blk-mq.h b/block/blk-mq.h index c11353a3749d..0ed8e5a8729f 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -41,6 +41,8 @@ void blk_mq_free_queue(struct request_queue *q); int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr); void blk_mq_wake_waiters(struct request_queue *q); bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool); +void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, + bool kick_requeue_list); void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list); bool blk_mq_get_driver_tag(struct request *rq); struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 35359697318b..cb2aa7ecafff 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -299,8 +299,6 @@ void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list); -void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, - bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); bool blk_mq_complete_request(struct request *rq); -- cgit From 373e915cd8e84544609eced57a44fbc084f8d60f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 20 Mar 2019 13:15:01 -0700 Subject: blk-iolatency: #include "blk.h" This patch avoids that the following warning is reported when building with W=1: block/blk-iolatency.c:734:5: warning: no previous prototype for 'blk_iolatency_init' [-Wmissing-prototypes] Cc: Josef Bacik Fixes: d70675121546 ("block: introduce blk-iolatency io controller") # v4.19 Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-iolatency.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 2620baa1f699..507212d75ee2 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -75,6 +75,7 @@ #include #include "blk-rq-qos.h" #include "blk-stat.h" +#include "blk.h" #define DEFAULT_SCALE_COOKIE 1000000U -- cgit From 537d71b3f774c3e825540ab626bad6c0ed2b5ff7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 20 Mar 2019 13:18:45 -0700 Subject: blkcg: Fix kernel-doc warnings Avoid that the following warnings are reported when building with W=1: block/blk-cgroup.c:1755: warning: Function parameter or member 'q' not described in 'blkcg_schedule_throttle' block/blk-cgroup.c:1755: warning: Function parameter or member 'use_memdelay' not described in 'blkcg_schedule_throttle' block/blk-cgroup.c:1779: warning: Function parameter or member 'blkg' not described in 'blkcg_add_delay' block/blk-cgroup.c:1779: warning: Function parameter or member 'now' not described in 'blkcg_add_delay' block/blk-cgroup.c:1779: warning: Function parameter or member 'delta' not described in 'blkcg_add_delay' Signed-off-by: Bart Van Assche Signed-off-by: Jens Axboe --- block/blk-cgroup.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 77f37ef8ef06..617a2b3f7582 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -1736,8 +1736,8 @@ out: /** * blkcg_schedule_throttle - this task needs to check for throttling - * @q - the request queue IO was submitted on - * @use_memdelay - do we charge this to memory delay for PSI + * @q: the request queue IO was submitted on + * @use_memdelay: do we charge this to memory delay for PSI * * This is called by the IO controller when we know there's delay accumulated * for the blkg for this task. We do not pass the blkg because there are places @@ -1769,8 +1769,9 @@ void blkcg_schedule_throttle(struct request_queue *q, bool use_memdelay) /** * blkcg_add_delay - add delay to this blkg - * @now - the current time in nanoseconds - * @delta - how many nanoseconds of delay to add + * @blkg: blkg of interest + * @now: the current time in nanoseconds + * @delta: how many nanoseconds of delay to add * * Charge @delta to the blkg's current delay accumulation. This is used to * throttle tasks if an IO controller thinks we need more throttling. -- cgit From 64447506f152cf0f88a0fc23140ca1c5f7ff34a8 Mon Sep 17 00:00:00 2001 From: Ioana Ciocoi Radulescu Date: Wed, 20 Mar 2019 14:11:04 +0000 Subject: dpaa2-eth: Fix possible access beyond end of array Make sure we don't try to enqueue XDP_REDIRECT frames to an inexistent FQ. While it is guaranteed not to have more than one queue per core, having fewer queues than CPUs on an interface is a valid configuration. Fixes: d678be1dc1ec ("dpaa2-eth: add XDP_REDIRECT support") Reported-by: Jesper Dangaard Brouer Signed-off-by: Ioana Radulescu Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 2ba49e959c3f..1a68052abb94 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -1817,7 +1817,7 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev, dpaa2_fd_set_format(&fd, dpaa2_fd_single); dpaa2_fd_set_ctrl(&fd, FD_CTRL_PTA); - fq = &priv->fq[smp_processor_id()]; + fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)]; for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { err = priv->enqueue(priv, fq, &fd, 0); if (err != -EBUSY) -- cgit From 7205981e045e752ccf96cf6ddd703a98c59d4339 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 13:41:50 -0500 Subject: scsi: ibmvscsi: Protect ibmvscsi_head from concurrent modificaiton For each ibmvscsi host created during a probe or destroyed during a remove we either add or remove that host to/from the global ibmvscsi_head list. This runs the risk of concurrent modification. This patch adds a simple spinlock around the list modification calls to prevent concurrent updates as is done similarly in the ibmvfc driver and ipr driver. Fixes: 32d6e4b6e4ea ("scsi: ibmvscsi: add vscsi hosts to global list_head") Cc: # v4.10+ Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvscsi.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 1135e74646e2..2b22969f3f63 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -96,6 +96,7 @@ static int client_reserve = 1; static char partition_name[96] = "UNKNOWN"; static unsigned int partition_number = -1; static LIST_HEAD(ibmvscsi_head); +static DEFINE_SPINLOCK(ibmvscsi_driver_lock); static struct scsi_transport_template *ibmvscsi_transport_template; @@ -2270,7 +2271,9 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) } dev_set_drvdata(&vdev->dev, hostdata); + spin_lock(&ibmvscsi_driver_lock); list_add_tail(&hostdata->host_list, &ibmvscsi_head); + spin_unlock(&ibmvscsi_driver_lock); return 0; add_srp_port_failed: @@ -2292,7 +2295,9 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) static int ibmvscsi_remove(struct vio_dev *vdev) { struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev); + spin_lock(&ibmvscsi_driver_lock); list_del(&hostdata->host_list); + spin_unlock(&ibmvscsi_driver_lock); unmap_persist_bufs(hostdata); release_event_pool(&hostdata->pool, hostdata); ibmvscsi_release_crq_queue(&hostdata->queue, hostdata, -- cgit From 7f5203c13ba8a7b7f9f6ecfe5a4d5567188d7835 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 13:41:51 -0500 Subject: scsi: ibmvscsi: Fix empty event pool access during host removal The event pool used for queueing commands is destroyed fairly early in the ibmvscsi_remove() code path. Since, this happens prior to the call so scsi_remove_host() it is possible for further calls to queuecommand to be processed which manifest as a panic due to a NULL pointer dereference as seen here: PANIC: "Unable to handle kernel paging request for data at address 0x00000000" Context process backtrace: DSISR: 0000000042000000 ????Syscall Result: 0000000000000000 4 [c000000002cb3820] memcpy_power7 at c000000000064204 [Link Register] [c000000002cb3820] ibmvscsi_send_srp_event at d000000003ed14a4 5 [c000000002cb3920] ibmvscsi_send_srp_event at d000000003ed14a4 [ibmvscsi] ?(unreliable) 6 [c000000002cb39c0] ibmvscsi_queuecommand at d000000003ed2388 [ibmvscsi] 7 [c000000002cb3a70] scsi_dispatch_cmd at d00000000395c2d8 [scsi_mod] 8 [c000000002cb3af0] scsi_request_fn at d00000000395ef88 [scsi_mod] 9 [c000000002cb3be0] __blk_run_queue at c000000000429860 10 [c000000002cb3c10] blk_delay_work at c00000000042a0ec 11 [c000000002cb3c40] process_one_work at c0000000000dac30 12 [c000000002cb3cd0] worker_thread at c0000000000db110 13 [c000000002cb3d80] kthread at c0000000000e3378 14 [c000000002cb3e30] ret_from_kernel_thread at c00000000000982c The kernel buffer log is overfilled with this log: [11261.952732] ibmvscsi: found no event struct in pool! This patch reorders the operations during host teardown. Start by calling the SRP transport and Scsi_Host remove functions to flush any outstanding work and set the host offline. LLDD teardown follows including destruction of the event pool, freeing the Command Response Queue (CRQ), and unmapping any persistent buffers. The event pool destruction is protected by the scsi_host lock, and the pool is purged prior of any requests for which we never received a response. Finally, move the removal of the scsi host from our global list to the end so that the host is easily locatable for debugging purposes during teardown. Cc: # v2.6.12+ Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvscsi.c | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c index 2b22969f3f63..8cec5230fe31 100644 --- a/drivers/scsi/ibmvscsi/ibmvscsi.c +++ b/drivers/scsi/ibmvscsi/ibmvscsi.c @@ -2295,17 +2295,27 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id) static int ibmvscsi_remove(struct vio_dev *vdev) { struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev); - spin_lock(&ibmvscsi_driver_lock); - list_del(&hostdata->host_list); - spin_unlock(&ibmvscsi_driver_lock); - unmap_persist_bufs(hostdata); + unsigned long flags; + + srp_remove_host(hostdata->host); + scsi_remove_host(hostdata->host); + + purge_requests(hostdata, DID_ERROR); + + spin_lock_irqsave(hostdata->host->host_lock, flags); release_event_pool(&hostdata->pool, hostdata); + spin_unlock_irqrestore(hostdata->host->host_lock, flags); + ibmvscsi_release_crq_queue(&hostdata->queue, hostdata, max_events); kthread_stop(hostdata->work_thread); - srp_remove_host(hostdata->host); - scsi_remove_host(hostdata->host); + unmap_persist_bufs(hostdata); + + spin_lock(&ibmvscsi_driver_lock); + list_del(&hostdata->host_list); + spin_unlock(&ibmvscsi_driver_lock); + scsi_host_put(hostdata->host); return 0; -- cgit From 6a3b45ada960ac475ec2b4103d43e57943b2b8d3 Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Tue, 19 Mar 2019 14:05:11 +0100 Subject: drm/exynos/mixer: fix MIXER shadow registry synchronisation code MIXER on Exynos5 SoCs uses different synchronisation method than Exynos4 to update internal state (shadow registers). Apparently the driver implements it incorrectly. The rule should be as follows: - do not request updating registers until previous request was finished, ie. MXR_CFG_LAYER_UPDATE_COUNT must be 0. - before setting registers synchronisation on VSYNC should be turned off, ie. MXR_STATUS_SYNC_ENABLE should be reset, - after finishing MXR_STATUS_SYNC_ENABLE should be set again. The patch hopefully implements it correctly. Below sample kernel log from page fault caused by the bug: [ 25.670038] exynos-sysmmu 14650000.sysmmu: 14450000.mixer: PAGE FAULT occurred at 0x2247b800 [ 25.677888] ------------[ cut here ]------------ [ 25.682164] kernel BUG at ../drivers/iommu/exynos-iommu.c:450! [ 25.687971] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP ARM [ 25.693778] Modules linked in: [ 25.696816] CPU: 5 PID: 1553 Comm: fb-release_test Not tainted 5.0.0-rc7-01157-g5f86b1566bdd #136 [ 25.705646] Hardware name: SAMSUNG EXYNOS (Flattened Device Tree) [ 25.711710] PC is at exynos_sysmmu_irq+0x1c0/0x264 [ 25.716470] LR is at lock_is_held_type+0x44/0x64 v2: added missing MXR_CFG_LAYER_UPDATE bit setting in mixer_enable_sync Reported-by: Marian Mihailescu Signed-off-by: Andrzej Hajda Signed-off-by: Inki Dae --- drivers/gpu/drm/exynos/exynos_mixer.c | 110 ++++++++++++++++++++-------------- 1 file changed, 66 insertions(+), 44 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_mixer.c b/drivers/gpu/drm/exynos/exynos_mixer.c index 0573eab0e190..f35e4ab55b27 100644 --- a/drivers/gpu/drm/exynos/exynos_mixer.c +++ b/drivers/gpu/drm/exynos/exynos_mixer.c @@ -20,6 +20,7 @@ #include "regs-vp.h" #include +#include #include #include #include @@ -352,15 +353,62 @@ static void mixer_cfg_vp_blend(struct mixer_context *ctx, unsigned int alpha) mixer_reg_write(ctx, MXR_VIDEO_CFG, val); } -static void mixer_vsync_set_update(struct mixer_context *ctx, bool enable) +static bool mixer_is_synced(struct mixer_context *ctx) { - /* block update on vsync */ - mixer_reg_writemask(ctx, MXR_STATUS, enable ? - MXR_STATUS_SYNC_ENABLE : 0, MXR_STATUS_SYNC_ENABLE); + u32 base, shadow; + if (ctx->mxr_ver == MXR_VER_16_0_33_0 || + ctx->mxr_ver == MXR_VER_128_0_0_184) + return !(mixer_reg_read(ctx, MXR_CFG) & + MXR_CFG_LAYER_UPDATE_COUNT_MASK); + + if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && + vp_reg_read(ctx, VP_SHADOW_UPDATE)) + return false; + + base = mixer_reg_read(ctx, MXR_CFG); + shadow = mixer_reg_read(ctx, MXR_CFG_S); + if (base != shadow) + return false; + + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); + shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); + if (base != shadow) + return false; + + base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(1)); + shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(1)); + if (base != shadow) + return false; + + return true; +} + +static int mixer_wait_for_sync(struct mixer_context *ctx) +{ + ktime_t timeout = ktime_add_us(ktime_get(), 100000); + + while (!mixer_is_synced(ctx)) { + usleep_range(1000, 2000); + if (ktime_compare(ktime_get(), timeout) > 0) + return -ETIMEDOUT; + } + return 0; +} + +static void mixer_disable_sync(struct mixer_context *ctx) +{ + mixer_reg_writemask(ctx, MXR_STATUS, 0, MXR_STATUS_SYNC_ENABLE); +} + +static void mixer_enable_sync(struct mixer_context *ctx) +{ + if (ctx->mxr_ver == MXR_VER_16_0_33_0 || + ctx->mxr_ver == MXR_VER_128_0_0_184) + mixer_reg_writemask(ctx, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE); + mixer_reg_writemask(ctx, MXR_STATUS, ~0, MXR_STATUS_SYNC_ENABLE); if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags)) - vp_reg_write(ctx, VP_SHADOW_UPDATE, enable ? - VP_SHADOW_UPDATE_ENABLE : 0); + vp_reg_write(ctx, VP_SHADOW_UPDATE, VP_SHADOW_UPDATE_ENABLE); } static void mixer_cfg_scan(struct mixer_context *ctx, int width, int height) @@ -498,7 +546,6 @@ static void vp_video_buffer(struct mixer_context *ctx, spin_lock_irqsave(&ctx->reg_slock, flags); - vp_reg_write(ctx, VP_SHADOW_UPDATE, 1); /* interlace or progressive scan mode */ val = (test_bit(MXR_BIT_INTERLACE, &ctx->flags) ? ~0 : 0); vp_reg_writemask(ctx, VP_MODE, val, VP_MODE_LINE_SKIP); @@ -553,11 +600,6 @@ static void vp_video_buffer(struct mixer_context *ctx, vp_regs_dump(ctx); } -static void mixer_layer_update(struct mixer_context *ctx) -{ - mixer_reg_writemask(ctx, MXR_CFG, ~0, MXR_CFG_LAYER_UPDATE); -} - static void mixer_graph_buffer(struct mixer_context *ctx, struct exynos_drm_plane *plane) { @@ -640,11 +682,6 @@ static void mixer_graph_buffer(struct mixer_context *ctx, mixer_cfg_layer(ctx, win, priority, true); mixer_cfg_gfx_blend(ctx, win, pixel_alpha, state->base.alpha); - /* layer update mandatory for mixer 16.0.33.0 */ - if (ctx->mxr_ver == MXR_VER_16_0_33_0 || - ctx->mxr_ver == MXR_VER_128_0_0_184) - mixer_layer_update(ctx); - spin_unlock_irqrestore(&ctx->reg_slock, flags); mixer_regs_dump(ctx); @@ -709,7 +746,7 @@ static void mixer_win_reset(struct mixer_context *ctx) static irqreturn_t mixer_irq_handler(int irq, void *arg) { struct mixer_context *ctx = arg; - u32 val, base, shadow; + u32 val; spin_lock(&ctx->reg_slock); @@ -723,26 +760,9 @@ static irqreturn_t mixer_irq_handler(int irq, void *arg) val &= ~MXR_INT_STATUS_VSYNC; /* interlace scan need to check shadow register */ - if (test_bit(MXR_BIT_INTERLACE, &ctx->flags)) { - if (test_bit(MXR_BIT_VP_ENABLED, &ctx->flags) && - vp_reg_read(ctx, VP_SHADOW_UPDATE)) - goto out; - - base = mixer_reg_read(ctx, MXR_CFG); - shadow = mixer_reg_read(ctx, MXR_CFG_S); - if (base != shadow) - goto out; - - base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(0)); - shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(0)); - if (base != shadow) - goto out; - - base = mixer_reg_read(ctx, MXR_GRAPHIC_BASE(1)); - shadow = mixer_reg_read(ctx, MXR_GRAPHIC_BASE_S(1)); - if (base != shadow) - goto out; - } + if (test_bit(MXR_BIT_INTERLACE, &ctx->flags) + && !mixer_is_synced(ctx)) + goto out; drm_crtc_handle_vblank(&ctx->crtc->base); } @@ -917,12 +937,14 @@ static void mixer_disable_vblank(struct exynos_drm_crtc *crtc) static void mixer_atomic_begin(struct exynos_drm_crtc *crtc) { - struct mixer_context *mixer_ctx = crtc->ctx; + struct mixer_context *ctx = crtc->ctx; - if (!test_bit(MXR_BIT_POWERED, &mixer_ctx->flags)) + if (!test_bit(MXR_BIT_POWERED, &ctx->flags)) return; - mixer_vsync_set_update(mixer_ctx, false); + if (mixer_wait_for_sync(ctx)) + dev_err(ctx->dev, "timeout waiting for VSYNC\n"); + mixer_disable_sync(ctx); } static void mixer_update_plane(struct exynos_drm_crtc *crtc, @@ -964,7 +986,7 @@ static void mixer_atomic_flush(struct exynos_drm_crtc *crtc) if (!test_bit(MXR_BIT_POWERED, &mixer_ctx->flags)) return; - mixer_vsync_set_update(mixer_ctx, true); + mixer_enable_sync(mixer_ctx); exynos_crtc_handle_event(crtc); } @@ -979,7 +1001,7 @@ static void mixer_enable(struct exynos_drm_crtc *crtc) exynos_drm_pipe_clk_enable(crtc, true); - mixer_vsync_set_update(ctx, false); + mixer_disable_sync(ctx); mixer_reg_writemask(ctx, MXR_STATUS, ~0, MXR_STATUS_SOFT_RESET); @@ -992,7 +1014,7 @@ static void mixer_enable(struct exynos_drm_crtc *crtc) mixer_commit(ctx); - mixer_vsync_set_update(ctx, true); + mixer_enable_sync(ctx); set_bit(MXR_BIT_POWERED, &ctx->flags); } -- cgit From 688931a5ad4e55ba0c215248ba510cd67bc3afb4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 19 Mar 2019 13:02:36 +0900 Subject: kbuild: skip sub-make for in-tree build with GNU Make 4.x Commit 2b50f7ab6368 ("kbuild: add workaround for Debian make-kpkg") annoyed people who want to wrap the top Makefile with GNUmakefile to customize it for their use. On second thought, we do not need to run the sub-make for in-tree build with Make 4.x because the 'MAKEFLAGS += -rR' issue only happens on GNU Make 3.x. With this commit, people will get back their workflow, and the Debian make-kpkg will still work. Fixes: 2b50f7ab6368 ("kbuild: add workaround for Debian make-kpkg") Reported-by: Andreas Schwab Reported-by: David Howells Signed-off-by: Masahiro Yamada Tested-by: Andreas Schwab Tested-by: David Howells --- Makefile | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/Makefile b/Makefile index 8c6acfdfe660..6c0635f69982 100644 --- a/Makefile +++ b/Makefile @@ -31,26 +31,12 @@ _all: # descending is started. They are now explicitly listed as the # prepare rule. -# Ugly workaround for Debian make-kpkg: -# make-kpkg directly includes the top Makefile of Linux kernel. In such a case, -# skip sub-make to support debian_* targets in ruleset/kernel_version.mk, but -# displays warning to discourage such abusage. -ifneq ($(word 2, $(MAKEFILE_LIST)),) -$(warning Do not include top Makefile of Linux Kernel) -sub-make-done := 1 -MAKEFLAGS += -rR -endif - ifneq ($(sub-make-done),1) # Do not use make's built-in rules and variables # (this increases performance and avoids hard-to-debug behaviour) MAKEFLAGS += -rR -# 'MAKEFLAGS += -rR' does not become immediately effective for old -# GNU Make versions. Cancel implicit rules for this Makefile. -$(lastword $(MAKEFILE_LIST)): ; - # Avoid funny character set dependencies unexport LC_ALL LC_COLLATE=C @@ -153,6 +139,7 @@ $(if $(KBUILD_OUTPUT),, \ # 'sub-make' below. MAKEFLAGS += --include-dir=$(CURDIR) +need-sub-make := 1 else # Do not print "Entering directory ..." at all for in-tree build. @@ -160,6 +147,16 @@ MAKEFLAGS += --no-print-directory endif # ifneq ($(KBUILD_OUTPUT),) +ifneq ($(filter 3.%,$(MAKE_VERSION)),) +# 'MAKEFLAGS += -rR' does not immediately become effective for GNU Make 3.x +# We need to invoke sub-make to avoid implicit rules in the top Makefile. +need-sub-make := 1 +# Cancel implicit rules for this Makefile. +$(lastword $(MAKEFILE_LIST)): ; +endif + +ifeq ($(need-sub-make),1) + PHONY += $(MAKECMDGOALS) sub-make $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make @@ -171,8 +168,11 @@ sub-make: $(if $(KBUILD_OUTPUT),-C $(KBUILD_OUTPUT) KBUILD_SRC=$(CURDIR)) \ -f $(CURDIR)/Makefile $(filter-out _all sub-make,$(MAKECMDGOALS)) -else # sub-make-done +endif # need-sub-make +endif # sub-make-done + # We process the rest of the Makefile if this is the final invocation of make +ifeq ($(need-sub-make),) # Do not print "Entering directory ...", # but we want to display it when entering to the output directory @@ -1757,7 +1757,7 @@ existing-targets := $(wildcard $(sort $(targets))) endif # ifeq ($(config-targets),1) endif # ifeq ($(mixed-targets),1) -endif # sub-make-done +endif # need-sub-make PHONY += FORCE FORCE: -- cgit From cba368c1f01c27ed62fca7a853531845d263bb01 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 18 Mar 2019 10:37:13 -0700 Subject: bpf: Only print ref_obj_id for refcounted reg Naresh reported that test_align fails because of the mismatch at the verbose printout of the register states. The reason is due to the newly added ref_obj_id. ref_obj_id is only useful for refcounted reg. Thus, this patch fixes it by only printing ref_obj_id for refcounted reg. While at it, it also uses comma instead of space to separate between "id" and "ref_obj_id". Fixes: 1b986589680a ("bpf: Fix bpf_tcp_sock and bpf_sk_fullsock issue related to bpf_sk_release") Reported-by: Naresh Kamboju Signed-off-by: Martin KaFai Lau Acked-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 86f9cd5d1c4e..5aa810882583 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -352,6 +352,14 @@ static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) map_value_has_spin_lock(reg->map_ptr); } +static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) +{ + return type == PTR_TO_SOCKET || + type == PTR_TO_SOCKET_OR_NULL || + type == PTR_TO_TCP_SOCK || + type == PTR_TO_TCP_SOCK_OR_NULL; +} + static bool arg_type_may_be_refcounted(enum bpf_arg_type type) { return type == ARG_PTR_TO_SOCK_COMMON; @@ -451,8 +459,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, if (t == PTR_TO_STACK) verbose(env, ",call_%d", func(env, reg)->callsite); } else { - verbose(env, "(id=%d ref_obj_id=%d", reg->id, - reg->ref_obj_id); + verbose(env, "(id=%d", reg->id); + if (reg_type_may_be_refcounted_or_null(t)) + verbose(env, ",ref_obj_id=%d", reg->ref_obj_id); if (t != SCALAR_VALUE) verbose(env, ",off=%d", reg->off); if (type_is_pkt_pointer(t)) -- cgit From 3123be11683ed8c2f26f787df81966b538ca9f72 Mon Sep 17 00:00:00 2001 From: Nishad Kamdar Date: Mon, 11 Mar 2019 19:57:04 +0530 Subject: ARM: dts: imx6ull: Use the correct style for SPDX License Identifier This patch corrects the SPDX License Identifier style in imx6ull-pinfunc-snvs.h. Changes made by using a script provided by Joe Perches here: https://lkml.org/lkml/2019/2/7/46 and making some manual changes. Suggested-by: Joe Perches Signed-off-by: Nishad Kamdar Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6ull-pinfunc-snvs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h b/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h index f6fb6783c193..54cfe72295aa 100644 --- a/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h +++ b/arch/arm/boot/dts/imx6ull-pinfunc-snvs.h @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2016 Freescale Semiconductor, Inc. * Copyright (C) 2017 NXP -- cgit From 5997da82145bb7c9a56d834894cb81f81f219344 Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Wed, 20 Mar 2019 15:35:45 -0700 Subject: binder: fix BUG_ON found by selinux-testsuite The selinux-testsuite found an issue resulting in a BUG_ON() where a conditional relied on a size_t going negative when checking the validity of a buffer offset. Fixes: 7a67a39320df ("binder: add function to copy binder object from buffer") Reported-by: Paul Moore Tested-by: Paul Moore Signed-off-by: Todd Kjos Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 8685882da64c..4b9c7ca492e6 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -2057,7 +2057,8 @@ static size_t binder_get_object(struct binder_proc *proc, size_t object_size = 0; read_size = min_t(size_t, sizeof(*object), buffer->data_size - offset); - if (read_size < sizeof(*hdr) || !IS_ALIGNED(offset, sizeof(u32))) + if (offset > buffer->data_size || read_size < sizeof(*hdr) || + !IS_ALIGNED(offset, sizeof(u32))) return 0; binder_alloc_copy_from_buffer(&proc->alloc, object, buffer, offset, read_size); -- cgit From 5cec2d2e5839f9c0fec319c523a911e0a7fd299f Mon Sep 17 00:00:00 2001 From: Todd Kjos Date: Fri, 1 Mar 2019 15:06:06 -0800 Subject: binder: fix race between munmap() and direct reclaim An munmap() on a binder device causes binder_vma_close() to be called which clears the alloc->vma pointer. If direct reclaim causes binder_alloc_free_page() to be called, there is a race where alloc->vma is read into a local vma pointer and then used later after the mm->mmap_sem is acquired. This can result in calling zap_page_range() with an invalid vma which manifests as a use-after-free in zap_page_range(). The fix is to check alloc->vma after acquiring the mmap_sem (which we were acquiring anyway) and skip zap_page_range() if it has changed to NULL. Signed-off-by: Todd Kjos Reviewed-by: Joel Fernandes (Google) Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/android/binder_alloc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index 6389467670a0..195f120c4e8c 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -927,14 +927,13 @@ enum lru_status binder_alloc_free_page(struct list_head *item, index = page - alloc->pages; page_addr = (uintptr_t)alloc->buffer + index * PAGE_SIZE; + + mm = alloc->vma_vm_mm; + if (!mmget_not_zero(mm)) + goto err_mmget; + if (!down_write_trylock(&mm->mmap_sem)) + goto err_down_write_mmap_sem_failed; vma = binder_alloc_get_vma(alloc); - if (vma) { - if (!mmget_not_zero(alloc->vma_vm_mm)) - goto err_mmget; - mm = alloc->vma_vm_mm; - if (!down_read_trylock(&mm->mmap_sem)) - goto err_down_write_mmap_sem_failed; - } list_lru_isolate(lru, item); spin_unlock(lock); @@ -945,10 +944,9 @@ enum lru_status binder_alloc_free_page(struct list_head *item, zap_page_range(vma, page_addr, PAGE_SIZE); trace_binder_unmap_user_end(alloc, index); - - up_read(&mm->mmap_sem); - mmput(mm); } + up_write(&mm->mmap_sem); + mmput(mm); trace_binder_unmap_kernel_start(alloc, index); -- cgit From 667a8f73753908c4d0171e52b71774f9be5d6713 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 15 Mar 2019 17:51:09 +0800 Subject: ALSA: hda/realtek: Enable headset MIC of Acer AIO with ALC286 Some Acer AIO desktops like Veriton Z6860G, Z4860G and Z4660G cannot record sound from headset MIC. This patch adds the ALC286_FIXUP_ACER_AIO_HEADSET_MIC quirk to fix this issue. Fixes: 9f8aefed9623 ("ALSA: hda/realtek: Fix mic issue on Acer AIO Veriton Z4660G") Fixes: b72f936f6b32 ("ALSA: hda/realtek: Fix mic issue on Acer AIO Veriton Z4860G/Z6860G") Signed-off-by: Jian-Hong Pan Reviewed-by: Kailang Yang Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 384719d5c44e..191830d4fa40 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5687,6 +5687,7 @@ enum { ALC225_FIXUP_DELL_WYSE_AIO_MIC_NO_PRESENCE, ALC225_FIXUP_WYSE_AUTO_MUTE, ALC225_FIXUP_WYSE_DISABLE_MIC_VREF, + ALC286_FIXUP_ACER_AIO_HEADSET_MIC, }; static const struct hda_fixup alc269_fixups[] = { @@ -6685,6 +6686,16 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC }, + [ALC286_FIXUP_ACER_AIO_HEADSET_MIC] = { + .type = HDA_FIXUP_VERBS, + .v.verbs = (const struct hda_verb[]) { + { 0x20, AC_VERB_SET_COEF_INDEX, 0x4f }, + { 0x20, AC_VERB_SET_PROC_COEF, 0x5029 }, + { } + }, + .chained = true, + .chain_id = ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6701,9 +6712,9 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), - SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), - SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), -- cgit From 7671ce0d92933762f469266daf43bd34d422d58c Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 12:21:35 -0500 Subject: staging: rtl8188eu: Fix potential NULL pointer dereference of kcalloc hwxmits is allocated via kcalloc and not checked for failure before its dereference. The patch fixes this problem by returning error upstream in rtl8723bs, rtl8188eu. Signed-off-by: Aditya Pakki Acked-by: Mukesh Ojha Reviewed-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8188eu/core/rtw_xmit.c | 9 +++++++-- drivers/staging/rtl8188eu/include/rtw_xmit.h | 2 +- drivers/staging/rtl8723bs/core/rtw_xmit.c | 14 +++++++------- drivers/staging/rtl8723bs/include/rtw_xmit.h | 2 +- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/drivers/staging/rtl8188eu/core/rtw_xmit.c b/drivers/staging/rtl8188eu/core/rtw_xmit.c index 1723a47a96b4..952f2ab51347 100644 --- a/drivers/staging/rtl8188eu/core/rtw_xmit.c +++ b/drivers/staging/rtl8188eu/core/rtw_xmit.c @@ -174,7 +174,9 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf; - rtw_alloc_hwxmits(padapter); + res = rtw_alloc_hwxmits(padapter); + if (res == _FAIL) + goto exit; rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry); for (i = 0; i < 4; i++) @@ -1503,7 +1505,7 @@ exit: return res; } -void rtw_alloc_hwxmits(struct adapter *padapter) +s32 rtw_alloc_hwxmits(struct adapter *padapter) { struct hw_xmit *hwxmits; struct xmit_priv *pxmitpriv = &padapter->xmitpriv; @@ -1512,6 +1514,8 @@ void rtw_alloc_hwxmits(struct adapter *padapter) pxmitpriv->hwxmits = kcalloc(pxmitpriv->hwxmit_entry, sizeof(struct hw_xmit), GFP_KERNEL); + if (!pxmitpriv->hwxmits) + return _FAIL; hwxmits = pxmitpriv->hwxmits; @@ -1519,6 +1523,7 @@ void rtw_alloc_hwxmits(struct adapter *padapter) hwxmits[1] .sta_queue = &pxmitpriv->vi_pending; hwxmits[2] .sta_queue = &pxmitpriv->be_pending; hwxmits[3] .sta_queue = &pxmitpriv->bk_pending; + return _SUCCESS; } void rtw_free_hwxmits(struct adapter *padapter) diff --git a/drivers/staging/rtl8188eu/include/rtw_xmit.h b/drivers/staging/rtl8188eu/include/rtw_xmit.h index 788f59c74ea1..ba7e15fbde72 100644 --- a/drivers/staging/rtl8188eu/include/rtw_xmit.h +++ b/drivers/staging/rtl8188eu/include/rtw_xmit.h @@ -336,7 +336,7 @@ s32 rtw_txframes_sta_ac_pending(struct adapter *padapter, void rtw_init_hwxmits(struct hw_xmit *phwxmit, int entry); s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter); void _rtw_free_xmit_priv(struct xmit_priv *pxmitpriv); -void rtw_alloc_hwxmits(struct adapter *padapter); +s32 rtw_alloc_hwxmits(struct adapter *padapter); void rtw_free_hwxmits(struct adapter *padapter); s32 rtw_xmit(struct adapter *padapter, struct sk_buff **pkt); diff --git a/drivers/staging/rtl8723bs/core/rtw_xmit.c b/drivers/staging/rtl8723bs/core/rtw_xmit.c index 094d61bcb469..b87f13a0b563 100644 --- a/drivers/staging/rtl8723bs/core/rtw_xmit.c +++ b/drivers/staging/rtl8723bs/core/rtw_xmit.c @@ -260,7 +260,9 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter) } } - rtw_alloc_hwxmits(padapter); + res = rtw_alloc_hwxmits(padapter); + if (res == _FAIL) + goto exit; rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry); for (i = 0; i < 4; i++) { @@ -2144,7 +2146,7 @@ exit: return res; } -void rtw_alloc_hwxmits(struct adapter *padapter) +s32 rtw_alloc_hwxmits(struct adapter *padapter) { struct hw_xmit *hwxmits; struct xmit_priv *pxmitpriv = &padapter->xmitpriv; @@ -2155,10 +2157,8 @@ void rtw_alloc_hwxmits(struct adapter *padapter) pxmitpriv->hwxmits = rtw_zmalloc(sizeof(struct hw_xmit) * pxmitpriv->hwxmit_entry); - if (pxmitpriv->hwxmits == NULL) { - DBG_871X("alloc hwxmits fail!...\n"); - return; - } + if (!pxmitpriv->hwxmits) + return _FAIL; hwxmits = pxmitpriv->hwxmits; @@ -2204,7 +2204,7 @@ void rtw_alloc_hwxmits(struct adapter *padapter) } - + return _SUCCESS; } void rtw_free_hwxmits(struct adapter *padapter) diff --git a/drivers/staging/rtl8723bs/include/rtw_xmit.h b/drivers/staging/rtl8723bs/include/rtw_xmit.h index 1b38b9182b31..37f42b2f22f1 100644 --- a/drivers/staging/rtl8723bs/include/rtw_xmit.h +++ b/drivers/staging/rtl8723bs/include/rtw_xmit.h @@ -487,7 +487,7 @@ s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter); void _rtw_free_xmit_priv (struct xmit_priv *pxmitpriv); -void rtw_alloc_hwxmits(struct adapter *padapter); +s32 rtw_alloc_hwxmits(struct adapter *padapter); void rtw_free_hwxmits(struct adapter *padapter); -- cgit From d70d70aec9632679dd00dcc1b1e8b2517e2c7da0 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 12:02:49 -0500 Subject: staging: rtlwifi: rtl8822b: fix to avoid potential NULL pointer dereference skb allocated via dev_alloc_skb can fail and return a NULL pointer. This patch avoids such a scenario and returns, consistent with other invocations. Signed-off-by: Aditya Pakki Reviewed-by: Mukesh Ojha Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtlwifi/rtl8822be/fw.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rtlwifi/rtl8822be/fw.c b/drivers/staging/rtlwifi/rtl8822be/fw.c index f061dd1382aa..cf6b7a80b753 100644 --- a/drivers/staging/rtlwifi/rtl8822be/fw.c +++ b/drivers/staging/rtlwifi/rtl8822be/fw.c @@ -743,6 +743,8 @@ void rtl8822be_set_fw_rsvdpagepkt(struct ieee80211_hw *hw, bool b_dl_finished) u1_rsvd_page_loc, 3); skb = dev_alloc_skb(totalpacketlen); + if (!skb) + return; memcpy((u8 *)skb_put(skb, totalpacketlen), &reserved_page_packet, totalpacketlen); -- cgit From 22c971db7dd4b0ad8dd88e99c407f7a1f4231a2e Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 21 Mar 2019 09:26:38 +0300 Subject: staging: rtl8712: uninitialized memory in read_bbreg_hdl() Colin King reported a bug in read_bbreg_hdl(): memcpy(pcmd->rsp, (u8 *)&val, pcmd->rspsz); The problem is that "val" is uninitialized. This code is obviously not useful, but so far as I can tell "pcmd->cmdcode" is never GEN_CMD_CODE(_Read_BBREG) so it's not harmful either. For now the easiest fix is to just call r8712_free_cmd_obj() and return. Fixes: 2865d42c78a9 ("staging: r8712u: Add the new driver to the mainline kernel") Reported-by: Colin Ian King Signed-off-by: Dan Carpenter Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtl8712/rtl8712_cmd.c | 10 +--------- drivers/staging/rtl8712/rtl8712_cmd.h | 2 +- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index 1920d02f7c9f..8c36acedf507 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -147,17 +147,9 @@ static u8 write_macreg_hdl(struct _adapter *padapter, u8 *pbuf) static u8 read_bbreg_hdl(struct _adapter *padapter, u8 *pbuf) { - u32 val; - void (*pcmd_callback)(struct _adapter *dev, struct cmd_obj *pcmd); struct cmd_obj *pcmd = (struct cmd_obj *)pbuf; - if (pcmd->rsp && pcmd->rspsz > 0) - memcpy(pcmd->rsp, (u8 *)&val, pcmd->rspsz); - pcmd_callback = cmd_callback[pcmd->cmdcode].callback; - if (!pcmd_callback) - r8712_free_cmd_obj(pcmd); - else - pcmd_callback(padapter, pcmd); + r8712_free_cmd_obj(pcmd); return H2C_SUCCESS; } diff --git a/drivers/staging/rtl8712/rtl8712_cmd.h b/drivers/staging/rtl8712/rtl8712_cmd.h index 92fb77666d44..1ef86b8c592f 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.h +++ b/drivers/staging/rtl8712/rtl8712_cmd.h @@ -140,7 +140,7 @@ enum rtl8712_h2c_cmd { static struct _cmd_callback cmd_callback[] = { {GEN_CMD_CODE(_Read_MACREG), NULL}, /*0*/ {GEN_CMD_CODE(_Write_MACREG), NULL}, - {GEN_CMD_CODE(_Read_BBREG), &r8712_getbbrfreg_cmdrsp_callback}, + {GEN_CMD_CODE(_Read_BBREG), NULL}, {GEN_CMD_CODE(_Write_BBREG), NULL}, {GEN_CMD_CODE(_Read_RFREG), &r8712_getbbrfreg_cmdrsp_callback}, {GEN_CMD_CODE(_Write_RFREG), NULL}, /*5*/ -- cgit From 6a8ca24590a2136921439b376c926c11a6effc0e Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 10:42:32 -0500 Subject: staging: rtlwifi: Fix potential NULL pointer dereference of kzalloc phydm.internal is allocated using kzalloc which is used multiple times without a check for NULL pointer. This patch avoids such a scenario by returning 0, consistent with the failure case. Signed-off-by: Aditya Pakki Reviewed-by: Mukesh Ojha Signed-off-by: Greg Kroah-Hartman --- drivers/staging/rtlwifi/phydm/rtl_phydm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/staging/rtlwifi/phydm/rtl_phydm.c b/drivers/staging/rtlwifi/phydm/rtl_phydm.c index 9930ed954abb..4cc77b2016e1 100644 --- a/drivers/staging/rtlwifi/phydm/rtl_phydm.c +++ b/drivers/staging/rtlwifi/phydm/rtl_phydm.c @@ -180,6 +180,8 @@ static int rtl_phydm_init_priv(struct rtl_priv *rtlpriv, rtlpriv->phydm.internal = kzalloc(sizeof(struct phy_dm_struct), GFP_KERNEL); + if (!rtlpriv->phydm.internal) + return 0; _rtl_phydm_init_com_info(rtlpriv, ic, params); -- cgit From 2b77158ffa92b820a0c5da9a3c6ead7aa069c71c Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Sun, 17 Mar 2019 12:58:25 +0300 Subject: mmc: mxcmmc: "Revert mmc: mxcmmc: handle highmem pages" This reverts commit b189e7589f6d3411e85c6b7ae6eef158f08f388f. Unable to handle kernel paging request at virtual address c8358000 pgd = efa405c3 [c8358000] *pgd=00000000 Internal error: Oops: 805 [#1] PREEMPT ARM CPU: 0 PID: 711 Comm: kworker/0:2 Not tainted 4.20.0+ #30 Hardware name: Freescale i.MX27 (Device Tree Support) Workqueue: events mxcmci_datawork PC is at mxcmci_datawork+0xbc/0x2ac LR is at mxcmci_datawork+0xac/0x2ac pc : [] lr : [] psr: 60000013 sp : c6c93f08 ip : 24004180 fp : 00000008 r10: c8358000 r9 : c78b3e24 r8 : c6c92000 r7 : 00000000 r6 : c7bb8680 r5 : c7bb86d4 r4 : c78b3de0 r3 : 00002502 r2 : c090b2e0 r1 : 00000880 r0 : 00000000 Flags: nZCv IRQs on FIQs on Mode SVC_32 ISA ARM Segment user Control: 0005317f Table: a68a8000 DAC: 00000055 Process kworker/0:2 (pid: 711, stack limit = 0x389543bc) Stack: (0xc6c93f08 to 0xc6c94000) 3f00: c7bb86d4 00000000 00000000 c6cbfde0 c7bb86d4 c7ee4200 3f20: 00000000 c0907ea8 00000000 c7bb86d8 c0907ea8 c012077c c6cbfde0 c7bb86d4 3f40: c6cbfde0 c6c92000 c6cbfdf4 c09280ba c0907ea8 c090b2e0 c0907ebc c0120c18 3f60: c6cbfde0 00000000 00000000 c6cbb580 c7ba7c40 c7837edc c6cbb598 00000000 3f80: c6cbfde0 c01208f8 00000000 c01254fc c7ba7c40 c0125400 00000000 00000000 3fa0: 00000000 00000000 00000000 c01010d0 00000000 00000000 00000000 00000000 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000 [] (mxcmci_datawork) from [] (process_one_work+0x1f0/0x338) [] (process_one_work) from [] (worker_thread+0x320/0x474) [] (worker_thread) from [] (kthread+0xfc/0x118) [] (kthread) from [] (ret_from_fork+0x14/0x24) Exception stack(0xc6c93fb0 to 0xc6c93ff8) 3fa0: 00000000 00000000 00000000 00000000 3fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 3fe0: 00000000 00000000 00000000 00000000 00000013 00000000 Code: e3500000 1a000059 e5153050 e5933038 (e48a3004) ---[ end trace 54ca629b75f0e737 ]--- note: kworker/0:2[711] exited with preempt_count 1 Signed-off-by: Alexander Shiyan Fixes: b189e7589f6d ("mmc: mxcmmc: handle highmem pages") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/mxcmmc.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c index d54612257b06..45f7b9b53d48 100644 --- a/drivers/mmc/host/mxcmmc.c +++ b/drivers/mmc/host/mxcmmc.c @@ -290,11 +290,8 @@ static void mxcmci_swap_buffers(struct mmc_data *data) struct scatterlist *sg; int i; - for_each_sg(data->sg, sg, data->sg_len, i) { - void *buf = kmap_atomic(sg_page(sg) + sg->offset); - buffer_swap32(buf, sg->length); - kunmap_atomic(buf); - } + for_each_sg(data->sg, sg, data->sg_len, i) + buffer_swap32(sg_virt(sg), sg->length); } #else static inline void mxcmci_swap_buffers(struct mmc_data *data) {} @@ -611,7 +608,6 @@ static int mxcmci_transfer_data(struct mxcmci_host *host) { struct mmc_data *data = host->req->data; struct scatterlist *sg; - void *buf; int stat, i; host->data = data; @@ -619,18 +615,14 @@ static int mxcmci_transfer_data(struct mxcmci_host *host) if (data->flags & MMC_DATA_READ) { for_each_sg(data->sg, sg, data->sg_len, i) { - buf = kmap_atomic(sg_page(sg) + sg->offset); - stat = mxcmci_pull(host, buf, sg->length); - kunmap(buf); + stat = mxcmci_pull(host, sg_virt(sg), sg->length); if (stat) return stat; host->datasize += sg->length; } } else { for_each_sg(data->sg, sg, data->sg_len, i) { - buf = kmap_atomic(sg_page(sg) + sg->offset); - stat = mxcmci_push(host, buf, sg->length); - kunmap(buf); + stat = mxcmci_push(host, sg_virt(sg), sg->length); if (stat) return stat; host->datasize += sg->length; -- cgit From 92edf8df0ff2ae86cc632eeca0e651fd8431d40d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 21 Mar 2019 15:24:33 +1100 Subject: powerpc/security: Fix spectre_v2 reporting When I updated the spectre_v2 reporting to handle software count cache flush I got the logic wrong when there's no software count cache enabled at all. The result is that on systems with the software count cache flush disabled we print: Mitigation: Indirect branch cache disabled, Software count cache flush Which correctly indicates that the count cache is disabled, but incorrectly says the software count cache flush is enabled. The root of the problem is that we are trying to handle all combinations of options. But we know now that we only expect to see the software count cache flush enabled if the other options are false. So split the two cases, which simplifies the logic and fixes the bug. We were also missing a space before "(hardware accelerated)". The result is we see one of: Mitigation: Indirect branch serialisation (kernel only) Mitigation: Indirect branch cache disabled Mitigation: Software count cache flush Mitigation: Software count cache flush (hardware accelerated) Fixes: ee13cb249fab ("powerpc/64s: Add support for software count cache flush") Cc: stable@vger.kernel.org # v4.19+ Signed-off-by: Michael Ellerman Reviewed-by: Michael Neuling Reviewed-by: Diana Craciun Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/security.c | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index 9b8631533e02..b33bafb8fcea 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -190,29 +190,22 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); - if (bcs || ccd || count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { - bool comma = false; + if (bcs || ccd) { seq_buf_printf(&s, "Mitigation: "); - if (bcs) { + if (bcs) seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); - comma = true; - } - if (ccd) { - if (comma) - seq_buf_printf(&s, ", "); - seq_buf_printf(&s, "Indirect branch cache disabled"); - comma = true; - } - - if (comma) + if (bcs && ccd) seq_buf_printf(&s, ", "); - seq_buf_printf(&s, "Software count cache flush"); + if (ccd) + seq_buf_printf(&s, "Indirect branch cache disabled"); + } else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) { + seq_buf_printf(&s, "Mitigation: Software count cache flush"); if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW) - seq_buf_printf(&s, "(hardware accelerated)"); + seq_buf_printf(&s, " (hardware accelerated)"); } else if (btb_flush_enabled) { seq_buf_printf(&s, "Mitigation: Branch predictor state flush"); } else { -- cgit From 031d2ccc16775c9531800979069e141fbedeb2f7 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Thu, 21 Mar 2019 11:45:44 +0530 Subject: mmc: sdhci-omap: Set caps2 to indicate no physical write protect pin After commit 6d5cd068ee59fba ("mmc: sdhci: use WP GPIO in sdhci_check_ro()") and commit 39ee32ce486756f ("mmc: sdhci-omap: drop ->get_ro() implementation"), sdhci-omap relied on SDHCI_PRESENT_STATE to check if the card is read-only, if wp-gpios is not populated in device tree. However SDHCI_PRESENT_STATE in sdhci-omap does not have correct read-only state. sdhci-omap can be used by platforms with both micro SD slot and standard SD slot with physical write protect pin (using GPIO). Set caps2 to MMC_CAP2_NO_WRITE_PROTECT based on if wp-gpios property is populated or not. This fix is required since existing device-tree node doesn't have "disable-wp" property and to preserve old-dt compatibility. Fixes: 6d5cd068ee59fba ("mmc: sdhci: use WP GPIO in sdhci_check_ro()") Fixes: 39ee32ce486756f ("mmc: sdhci-omap: drop ->get_ro() implementation") Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Ulf Hansson --- drivers/mmc/host/sdhci-omap.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/mmc/host/sdhci-omap.c b/drivers/mmc/host/sdhci-omap.c index b1a66ca3821a..5bbed477c9b1 100644 --- a/drivers/mmc/host/sdhci-omap.c +++ b/drivers/mmc/host/sdhci-omap.c @@ -1056,6 +1056,9 @@ static int sdhci_omap_probe(struct platform_device *pdev) mmc->f_max = 48000000; } + if (!mmc_can_gpio_ro(mmc)) + mmc->caps2 |= MMC_CAP2_NO_WRITE_PROTECT; + pltfm_host->clk = devm_clk_get(dev, "fck"); if (IS_ERR(pltfm_host->clk)) { ret = PTR_ERR(pltfm_host->clk); -- cgit From 5ea47691bd99e1100707ec63364aff72324e2af4 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 20 Mar 2019 14:36:53 +0800 Subject: mmc: alcor: fix DMA reads Setting max_blk_count to 1 here was causing the mmc block layer to always use the MMC_READ_SINGLE_BLOCK command here, which the driver does not DMA-accelerate. Drop the max_blk_ settings here. The mmc host defaults suffice, along with the max_segs and max_seg_size settings, which I have now documented in more detail. Now each MMC command reads 4 512-byte blocks, using DMA instead of PIO. On my SD card, this increases read performance (measured with dd) from 167kb/sec to 4.6mb/sec. Link: http://lkml.kernel.org/r/CAD8Lp47L5T3jnAjBiPs1cQ+yFA3L6LJtgFvMETnBrY63-Zdi2g@mail.gmail.com Signed-off-by: Daniel Drake Reviewed-by: Oleksij Rempel Fixes: c5413ad815a6 ("mmc: add new Alcor Micro Cardreader SD/MMC driver") Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/alcor.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/drivers/mmc/host/alcor.c b/drivers/mmc/host/alcor.c index c712b7deb3a9..82a97866e0cf 100644 --- a/drivers/mmc/host/alcor.c +++ b/drivers/mmc/host/alcor.c @@ -1044,14 +1044,27 @@ static void alcor_init_mmc(struct alcor_sdmmc_host *host) mmc->caps2 = MMC_CAP2_NO_SDIO; mmc->ops = &alcor_sdc_ops; - /* Hardware cannot do scatter lists */ + /* The hardware does DMA data transfer of 4096 bytes to/from a single + * buffer address. Scatterlists are not supported, but upon DMA + * completion (signalled via IRQ), the original vendor driver does + * then immediately set up another DMA transfer of the next 4096 + * bytes. + * + * This means that we need to handle the I/O in 4096 byte chunks. + * Lacking a way to limit the sglist entries to 4096 bytes, we instead + * impose that only one segment is provided, with maximum size 4096, + * which also happens to be the minimum size. This means that the + * single-entry sglist handled by this driver can be handed directly + * to the hardware, nice and simple. + * + * Unfortunately though, that means we only do 4096 bytes I/O per + * MMC command. A future improvement would be to make the driver + * accept sg lists and entries of any size, and simply iterate + * through them 4096 bytes at a time. + */ mmc->max_segs = AU6601_MAX_DMA_SEGMENTS; mmc->max_seg_size = AU6601_MAX_DMA_BLOCK_SIZE; - - mmc->max_blk_size = mmc->max_seg_size; - mmc->max_blk_count = mmc->max_segs; - - mmc->max_req_size = mmc->max_seg_size * mmc->max_segs; + mmc->max_req_size = mmc->max_seg_size; } static int alcor_pci_sdmmc_drv_probe(struct platform_device *pdev) -- cgit From c9a9497ccef205ed4ed2e247011382627876d831 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 19 Mar 2019 11:12:59 +0100 Subject: mmc: renesas_sdhi: limit block count to 16 bit for old revisions R-Car Gen2 has two different SDHI incarnations in the same chip. The older one does not support the recently introduced 32 bit register access to the block count register. Make sure we use this feature only after the first known version. Thanks to the Renesas Testing team for this bug report! Fixes: 5603731a15ef ("mmc: tmio: fix access width of Block Count Register") Reported-by: Yoshihiro Shimoda Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Tested-by: Phong Hoang Cc: stable@vger.kernel.org Signed-off-by: Ulf Hansson --- drivers/mmc/host/renesas_sdhi_core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/mmc/host/renesas_sdhi_core.c b/drivers/mmc/host/renesas_sdhi_core.c index 71e13844df6c..8742e27e4e8b 100644 --- a/drivers/mmc/host/renesas_sdhi_core.c +++ b/drivers/mmc/host/renesas_sdhi_core.c @@ -641,6 +641,7 @@ int renesas_sdhi_probe(struct platform_device *pdev, struct renesas_sdhi *priv; struct resource *res; int irq, ret, i; + u16 ver; of_data = of_device_get_match_data(&pdev->dev); @@ -773,12 +774,17 @@ int renesas_sdhi_probe(struct platform_device *pdev, if (ret) goto efree; + ver = sd_ctrl_read16(host, CTL_VERSION); + /* GEN2_SDR104 is first known SDHI to use 32bit block count */ + if (ver < SDHI_VER_GEN2_SDR104 && mmc_data->max_blk_count > U16_MAX) + mmc_data->max_blk_count = U16_MAX; + ret = tmio_mmc_host_probe(host); if (ret < 0) goto edisclk; /* One Gen2 SDHI incarnation does NOT have a CBSY bit */ - if (sd_ctrl_read16(host, CTL_VERSION) == SDHI_VER_GEN2_SDR50) + if (ver == SDHI_VER_GEN2_SDR50) mmc_data->flags &= ~TMIO_MMC_HAVE_CBSY; /* Enable tuning iff we have an SCC and a supported mode */ -- cgit From 551417af91b163bd697eb50b3601adae2177c28a Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 18 Mar 2019 14:51:23 +0800 Subject: genirq: Fix typo in comment of IRQD_MOVE_PCNTXT Signed-off-by: Peter Xu Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Dou Liyang Cc: Julien Thierry Link: https://lkml.kernel.org/r/20190318065123.11862-1-peterx@redhat.com --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/irq.h b/include/linux/irq.h index d6160d479b14..7ae8de5ad0f2 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -195,7 +195,7 @@ struct irq_data { * IRQD_LEVEL - Interrupt is level triggered * IRQD_WAKEUP_STATE - Interrupt is configured for wakeup * from suspend - * IRDQ_MOVE_PCNTXT - Interrupt can be moved in process + * IRQD_MOVE_PCNTXT - Interrupt can be moved in process * context * IRQD_IRQ_DISABLED - Disabled state of the interrupt * IRQD_IRQ_MASKED - Masked state of the interrupt -- cgit From 82efcab3b9f3ef59e9713237c6e3c05c3a95c1ae Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 Mar 2019 16:02:55 -0700 Subject: workqueue: Only unregister a registered lockdep key The recent change to prevent use after free and a memory leak introduced an unconditional call to wq_unregister_lockdep() in the error handling path. If the lockdep key had not been registered yet, then the lockdep core emits a warning. Only call wq_unregister_lockdep() if wq_register_lockdep() has been called first. Fixes: 009bb421b6ce ("workqueue, lockdep: Fix an alloc_workqueue() error path") Reported-by: syzbot+be0c198232f86389c3dd@syzkaller.appspotmail.com Signed-off-by: Bart Van Assche Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Tejun Heo Cc: Qian Cai Link: https://lkml.kernel.org/r/20190311230255.176081-1-bvanassche@acm.org --- kernel/workqueue.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4026d1871407..ddee541ea97a 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4266,7 +4266,7 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, INIT_LIST_HEAD(&wq->list); if (alloc_and_link_pwqs(wq) < 0) - goto err_free_wq; + goto err_unreg_lockdep; if (wq_online && init_rescuer(wq) < 0) goto err_destroy; @@ -4292,9 +4292,10 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, return wq; -err_free_wq: +err_unreg_lockdep: wq_unregister_lockdep(wq); wq_free_lockdep(wq); +err_free_wq: free_workqueue_attrs(wq->unbound_attrs); kfree(wq); return NULL; -- cgit From 0c671812f152b628bd87c0af49da032cc2a2c319 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 18 Mar 2019 19:09:38 -0500 Subject: objtool: Move objtool_file struct off the stack Objtool uses over 512k of stack, thanks to the hash table embedded in the objtool_file struct. This causes an unnecessarily large stack allocation and breaks users with low stack limits. Move the struct off the stack. Fixes: 042ba73fe7eb ("objtool: Add several performance improvements") Reported-by: Vassili Karpov Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/df92dcbc4b84b02ffa252f46876df125fb56e2d7.1552954176.git.jpoimboe@redhat.com --- tools/objtool/check.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 0414a0d52262..5dde107083c6 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2184,9 +2184,10 @@ static void cleanup(struct objtool_file *file) elf_close(file->elf); } +static struct objtool_file file; + int check(const char *_objname, bool orc) { - struct objtool_file file; int ret, warnings = 0; objname = _objname; -- cgit From dc3173c7067ebbac44ed4a5c3636dc0ff27f981b Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 20 Mar 2019 22:22:20 +0800 Subject: irqchip/brcmstb-l2: Make two init functions static Fix sparse warnings: drivers/irqchip/irq-brcmstb-l2.c:278:12: warning: symbol 'brcmstb_l2_edge_intc_of_init' was not declared. Should it be static? drivers/irqchip/irq-brcmstb-l2.c:285:12: warning: symbol 'brcmstb_l2_lvl_intc_of_init' was not declared. Should it be static? Signed-off-by: YueHaibing Reviewed-by: Florian Fainelli Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-brcmstb-l2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/irqchip/irq-brcmstb-l2.c b/drivers/irqchip/irq-brcmstb-l2.c index 83364fedbf0a..5e4ca139e4ea 100644 --- a/drivers/irqchip/irq-brcmstb-l2.c +++ b/drivers/irqchip/irq-brcmstb-l2.c @@ -275,14 +275,14 @@ out_free: return ret; } -int __init brcmstb_l2_edge_intc_of_init(struct device_node *np, +static int __init brcmstb_l2_edge_intc_of_init(struct device_node *np, struct device_node *parent) { return brcmstb_l2_intc_of_init(np, parent, &l2_edge_intc_init); } IRQCHIP_DECLARE(brcmstb_l2_intc, "brcm,l2-intc", brcmstb_l2_edge_intc_of_init); -int __init brcmstb_l2_lvl_intc_of_init(struct device_node *np, +static int __init brcmstb_l2_lvl_intc_of_init(struct device_node *np, struct device_node *parent) { return brcmstb_l2_intc_of_init(np, parent, &l2_lvl_intc_init); -- cgit From 096048cb120d5318b3a9a7c1e062a4b11c0e80ab Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Wed, 20 Mar 2019 21:40:27 +0800 Subject: irqchip/mmp: Make mmp_irq_domain_ops static Fix sparse warning: drivers/irqchip/irq-mmp.c:182:29: warning: symbol 'mmp_irq_domain_ops' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-mmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mmp.c b/drivers/irqchip/irq-mmp.c index 25f32e1d7764..85d1c4d49572 100644 --- a/drivers/irqchip/irq-mmp.c +++ b/drivers/irqchip/irq-mmp.c @@ -176,7 +176,7 @@ static int mmp_irq_domain_xlate(struct irq_domain *d, struct device_node *node, return 0; } -const struct irq_domain_ops mmp_irq_domain_ops = { +static const struct irq_domain_ops mmp_irq_domain_ops = { .map = mmp_irq_domain_map, .xlate = mmp_irq_domain_xlate, }; -- cgit From 24105bf4d10485143f8e26337cda8bcb7f6e6da5 Mon Sep 17 00:00:00 2001 From: Fabrizio Castro Date: Tue, 19 Mar 2019 11:02:01 +0000 Subject: dt-bindings: irqchip: renesas-irqc: Document r8a774c0 support Document RZ/G2E (R8A774C0) SoC bindings. Signed-off-by: Fabrizio Castro Reviewed-by: Geert Uytterhoeven Reviewed-by: Simon Horman Reviewed-by: Rob Herring Signed-off-by: Marc Zyngier --- Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt index 8de96a4fb2d5..f977ea7617f6 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt +++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.txt @@ -16,6 +16,7 @@ Required properties: - "renesas,irqc-r8a7793" (R-Car M2-N) - "renesas,irqc-r8a7794" (R-Car E2) - "renesas,intc-ex-r8a774a1" (RZ/G2M) + - "renesas,intc-ex-r8a774c0" (RZ/G2E) - "renesas,intc-ex-r8a7795" (R-Car H3) - "renesas,intc-ex-r8a7796" (R-Car M3-W) - "renesas,intc-ex-r8a77965" (R-Car M3-N) -- cgit From 0dda09666f50eae9c5b794dd89b1fd8a8d89d714 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Thu, 7 Mar 2019 19:40:35 +0100 Subject: irqchip/stm32: Don't clear rising/falling config registers at init Falling and rising configuration and status registers are not banked. As they are shared with M4 co-processor, they should not be cleared at probe time, else M4 co-processor configuration will be lost. Fixes: f9fc1745501e ("irqchip/stm32: Add host and driver data structures") Signed-off-by: Loic Pallardy Signed-off-by: Fabien Dessenne Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-stm32-exti.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index 6edfd4bfa169..dab37fbfab82 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -735,11 +735,6 @@ stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data, */ writel_relaxed(0, base + stm32_bank->imr_ofst); writel_relaxed(0, base + stm32_bank->emr_ofst); - writel_relaxed(0, base + stm32_bank->rtsr_ofst); - writel_relaxed(0, base + stm32_bank->ftsr_ofst); - writel_relaxed(~0UL, base + stm32_bank->rpr_ofst); - if (stm32_bank->fpr_ofst != UNDEF_REG) - writel_relaxed(~0UL, base + stm32_bank->fpr_ofst); pr_info("%pOF: bank%d\n", h_data->node, bank_idx); -- cgit From 6a77623d78b307b34d4cf7886da6a907689bf388 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Thu, 7 Mar 2019 19:40:36 +0100 Subject: irqchip/stm32: Don't set rising configuration registers at init The rising configuration status register (rtsr) is not banked. As it is shared with the co-processor, it should not be written at probe time, else the co-processor configuration will be lost. Fixes: f9fc1745501e ("irqchip/stm32: Add host and driver data structures") Signed-off-by: Fabien Dessenne Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-stm32-exti.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/irqchip/irq-stm32-exti.c b/drivers/irqchip/irq-stm32-exti.c index dab37fbfab82..6b19bffbad78 100644 --- a/drivers/irqchip/irq-stm32-exti.c +++ b/drivers/irqchip/irq-stm32-exti.c @@ -716,7 +716,6 @@ stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data, const struct stm32_exti_bank *stm32_bank; struct stm32_exti_chip_data *chip_data; void __iomem *base = h_data->base; - u32 irqs_mask; stm32_bank = h_data->drv_data->exti_banks[bank_idx]; chip_data = &h_data->chips_data[bank_idx]; @@ -725,10 +724,6 @@ stm32_exti_chip_data *stm32_exti_chip_init(struct stm32_exti_host_data *h_data, raw_spin_lock_init(&chip_data->rlock); - /* Determine number of irqs supported */ - writel_relaxed(~0UL, base + stm32_bank->rtsr_ofst); - irqs_mask = readl_relaxed(base + stm32_bank->rtsr_ofst); - /* * This IP has no reset, so after hot reboot we should * clear registers to avoid residue -- cgit From fca269f201a8d9985c0a31fb60b15d4eb57cef80 Mon Sep 17 00:00:00 2001 From: Jianguo Chen Date: Wed, 20 Mar 2019 18:54:21 +0000 Subject: irqchip/mbigen: Don't clear eventid when freeing an MSI mbigen_write_msg clears eventid bits of a mbigen register when free a interrupt, because msi_domain_deactivate memset struct msg to zero. Then multiple mbigen pins with zero eventid will report the same interrupt number. The eventid clear call trace: free_irq __free_irq irq_shutdown irq_domain_deactivate_irq __irq_domain_deactivate_irq __irq_domain_deactivate_irq msi_domain_deactivate platform_msi_write_msg mbigen_write_msg Signed-off-by: Jianguo Chen [maz: massaged subject] Signed-off-by: Marc Zyngier --- drivers/irqchip/irq-mbigen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 567b29c47608..98b6e1d4b1a6 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -161,6 +161,9 @@ static void mbigen_write_msg(struct msi_desc *desc, struct msi_msg *msg) void __iomem *base = d->chip_data; u32 val; + if (!msg->address_lo && !msg->address_hi) + return; + base += get_mbigen_vec_reg(d->hwirq); val = readl_relaxed(base); -- cgit From 0803278b0b4d8eeb2b461fb698785df65a725d9e Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Thu, 21 Mar 2019 18:00:35 +0800 Subject: bpf: do not restore dst_reg when cur_state is freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Syzkaller hit 'KASAN: use-after-free Write in sanitize_ptr_alu' bug. Call trace: dump_stack+0xbf/0x12e print_address_description+0x6a/0x280 kasan_report+0x237/0x360 sanitize_ptr_alu+0x85a/0x8d0 adjust_ptr_min_max_vals+0x8f2/0x1ca0 adjust_reg_min_max_vals+0x8ed/0x22e0 do_check+0x1ca6/0x5d00 bpf_check+0x9ca/0x2570 bpf_prog_load+0xc91/0x1030 __se_sys_bpf+0x61e/0x1f00 do_syscall_64+0xc8/0x550 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fault injection trace:  kfree+0xea/0x290  free_func_state+0x4a/0x60  free_verifier_state+0x61/0xe0  push_stack+0x216/0x2f0 <- inject failslab  sanitize_ptr_alu+0x2b1/0x8d0  adjust_ptr_min_max_vals+0x8f2/0x1ca0  adjust_reg_min_max_vals+0x8ed/0x22e0  do_check+0x1ca6/0x5d00  bpf_check+0x9ca/0x2570  bpf_prog_load+0xc91/0x1030  __se_sys_bpf+0x61e/0x1f00  do_syscall_64+0xc8/0x550  entry_SYSCALL_64_after_hwframe+0x49/0xbe When kzalloc() fails in push_stack(), free_verifier_state() will free current verifier state. As push_stack() returns, dst_reg was restored if ptr_is_dst_reg is false. However, as member of the cur_state, dst_reg is also freed, and error occurs when dereferencing dst_reg. Simply fix it by testing ret of push_stack() before restoring dst_reg. Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") Signed-off-by: Xu Yu Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 5aa810882583..f19d5e04c69d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3381,7 +3381,7 @@ do_sim: *dst_reg = *ptr_reg; } ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); - if (!ptr_is_dst_reg) + if (!ptr_is_dst_reg && ret) *dst_reg = tmp; return !ret ? -EFAULT : 0; } -- cgit From a9c640ac96e19b3966357ec9bb586edd2e1e74de Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 14 Mar 2019 15:14:57 -0700 Subject: x86/boot: Restrict header scope to make Clang happy The inclusion of was causing issue as the definition of __arch_hweight64 from arch/x86/include/asm/arch_hweight.h eventually gets included. The definition is problematic when compiled with -m16 (all code in arch/x86/boot/ is) as the "D" inline assembly constraint is rejected by both compilers when passed an argument of type long long (regardless of signedness, anything smaller is fine). Because GCC performs inlining before semantic analysis, and __arch_hweight64 is dead in this translation unit, GCC does not report any issues at compile time. Clang does the semantic analysis in the front end, before inlining (run in the middle) can determine the code is dead. I consider this another case of PR33587, which I think we can do more work to solve. It turns out that arch/x86/boot/string.c doesn't actually need linux/kernel.h, simply linux/limits.h and linux/compiler.h. Suggested-by: Stephen Rothwell Signed-off-by: Nick Desaulniers Signed-off-by: Thomas Gleixner Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Cc: bp@alien8.de Cc: niravd@google.com Cc: "H. Peter Anvin" Cc: Chao Fan Cc: Uros Bizjak Link: https://bugs.llvm.org/show_bug.cgi?id=33587 Link: https://github.com/ClangBuiltLinux/linux/issues/347 Link: https://lkml.kernel.org/r/20190314221458.83047-1-ndesaulniers@google.com --- arch/x86/boot/string.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 315a67b8896b..90154df8f125 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -13,8 +13,9 @@ */ #include -#include +#include #include +#include #include #include "ctype.h" #include "string.h" -- cgit From 725e29db8cb9058976559bc3239c97ef7db40eea Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 14 Mar 2019 23:08:38 +0000 Subject: x86/lib: Fix indentation issue, remove extra tab The increment of buff is indented one level too deeply, clean this up by removing a tab. Signed-off-by: Colin Ian King Signed-off-by: Thomas Gleixner Cc: Borislav Petkov Cc: "H . Peter Anvin" Cc: kernel-janitors@vger.kernel.org Link: https://lkml.kernel.org/r/20190314230838.18256-1-colin.king@canonical.com --- arch/x86/lib/csum-partial_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/csum-partial_64.c b/arch/x86/lib/csum-partial_64.c index 9baca3e054be..e7925d668b68 100644 --- a/arch/x86/lib/csum-partial_64.c +++ b/arch/x86/lib/csum-partial_64.c @@ -94,7 +94,7 @@ static unsigned do_csum(const unsigned char *buff, unsigned len) : "m" (*(unsigned long *)buff), "r" (zero), "0" (result)); --count; - buff += 8; + buff += 8; } result = add32_with_carry(result>>32, result&0xffffffff); -- cgit From 2e84f116afca3719c9d0a1a78b47b48f75fd5724 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 21:19:56 -0500 Subject: x86/hpet: Prevent potential NULL pointer dereference hpet_virt_address may be NULL when ioremap_nocache fail, but the code lacks a check. Add a check to prevent NULL pointer dereference. Signed-off-by: Aditya Pakki Signed-off-by: Thomas Gleixner Cc: kjlu@umn.edu Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Joe Perches Cc: Nicolai Stange Cc: Roland Dreier Link: https://lkml.kernel.org/r/20190319021958.17275-1-pakki001@umn.edu --- arch/x86/kernel/hpet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dfd3aca82c61..fb32925a2e62 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -905,6 +905,8 @@ int __init hpet_enable(void) return 0; hpet_set_mapping(); + if (!hpet_virt_address) + return 0; /* * Read the period and check for a sane value: -- cgit From 534c89c22e26b183d838294f0937ee092c82ad3a Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 00:46:51 -0500 Subject: x86/hyperv: Prevent potential NULL pointer dereference The page allocation in hv_cpu_init() can fail, but the code does not have a check for that. Add a check and return -ENOMEM when the allocation fails. [ tglx: Massaged changelog ] Signed-off-by: Kangjie Lu Signed-off-by: Thomas Gleixner Reviewed-by: Mukesh Ojha Acked-by: "K. Y. Srinivasan" Cc: pakki001@umn.edu Cc: Haiyang Zhang Cc: Stephen Hemminger Cc: Sasha Levin Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: linux-hyperv@vger.kernel.org Link: https://lkml.kernel.org/r/20190314054651.1315-1-kjlu@umn.edu --- arch/x86/hyperv/hv_init.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6461a16b4559..e4ba467a9fc6 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -103,9 +103,13 @@ static int hv_cpu_init(unsigned int cpu) u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; void **input_arg; + struct page *pg; input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - *input_arg = page_address(alloc_page(GFP_KERNEL)); + pg = alloc_page(GFP_KERNEL); + if (unlikely(!pg)) + return -ENOMEM; + *input_arg = page_address(pg); hv_get_vp_index(msr_vp_index); -- cgit From 9bd681251b7c1db1c6cfe29a72c5ea1b1c0ba022 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 13 Mar 2019 12:00:22 +0100 Subject: x86/microcode: Announce reload operation's completion By popular demand, issue a single line to dmesg after the reload operation completes to let the user know that a reload has at least been attempted. Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20190313110022.8229-1-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 97f9ada9ceda..5260185cbf7b 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -608,6 +608,8 @@ static int microcode_reload_late(void) if (ret > 0) microcode_check(); + pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode); + return ret; } -- cgit From 18fb053f9b827bd98cfc64f2a35df8ab19745a1d Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Thu, 14 Mar 2019 16:46:00 -0400 Subject: x86/cpu/cyrix: Use correct macros for Cyrix calls on Geode processors There are comments in processor-cyrix.h advising you to _not_ make calls using the deprecated macros in this style: setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); This is because it expands the macro into a non-functioning calling sequence. The calling order must be: outb(CX86_CCR2, 0x22); inb(0x23); From the comments: * When using the old macros a line like * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); * gets expanded to: * do { * outb((CX86_CCR2), 0x22); * outb((({ * outb((CX86_CCR2), 0x22); * inb(0x23); * }) | 0x88), 0x23); * } while (0); The new macros fix this problem, so use them instead. Tested on an actual Geode processor. Signed-off-by: Matthew Whitehead Signed-off-by: Thomas Gleixner Cc: luto@kernel.org Link: https://lkml.kernel.org/r/1552596361-8967-2-git-send-email-tedheadster@gmail.com --- arch/x86/kernel/cpu/cyrix.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d12226f60168..1d9b8aaea06c 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -124,7 +124,7 @@ static void set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -135,11 +135,11 @@ static void set_cx86_memwb(void) pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } /* @@ -153,14 +153,14 @@ static void geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -296,7 +296,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -319,7 +319,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); } else { /* A 6x86MX - it has the bug. */ set_cpu_bug(c, X86_BUG_COMA); -- cgit From 0f4d3aa761b71cd6984330baca1e18bf0590e441 Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Thu, 14 Mar 2019 16:46:01 -0400 Subject: x86/cpu/cyrix: Remove {get,set}Cx86_old macros used for Cyrix processors The getCx86_old() and setCx86_old() macros have been replaced with correctly working getCx86() and setCx86(), so remove these unused macros. Signed-off-by: Matthew Whitehead Signed-off-by: Thomas Gleixner Cc: luto@kernel.org Link: https://lkml.kernel.org/r/1552596361-8967-3-git-send-email-tedheadster@gmail.com --- arch/x86/include/asm/processor-cyrix.h | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arch/x86/include/asm/processor-cyrix.h b/arch/x86/include/asm/processor-cyrix.h index aaedd73ea2c6..df700a6cc869 100644 --- a/arch/x86/include/asm/processor-cyrix.h +++ b/arch/x86/include/asm/processor-cyrix.h @@ -3,19 +3,6 @@ * NSC/Cyrix CPU indexed register access. Must be inlined instead of * macros to ensure correct access ordering * Access order is always 0x22 (=offset), 0x23 (=value) - * - * When using the old macros a line like - * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); - * gets expanded to: - * do { - * outb((CX86_CCR2), 0x22); - * outb((({ - * outb((CX86_CCR2), 0x22); - * inb(0x23); - * }) | 0x88), 0x23); - * } while (0); - * - * which in fact violates the access order (= 0x22, 0x22, 0x23, 0x23). */ static inline u8 getCx86(u8 reg) @@ -29,11 +16,3 @@ static inline void setCx86(u8 reg, u8 data) outb(reg, 0x22); outb(data, 0x23); } - -#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); }) - -#define setCx86_old(reg, data) do { \ - outb((reg), 0x22); \ - outb((data), 0x23); \ -} while (0) - -- cgit From 2b1d9c8f87235f593826b9cf46ec10247741fff9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 20 Mar 2019 16:15:24 -0500 Subject: ALSA: rawmidi: Fix potential Spectre v1 vulnerability info->stream is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: sound/core/rawmidi.c:604 __snd_rawmidi_info_select() warn: potential spectre issue 'rmidi->streams' [r] (local cap) Fix this by sanitizing info->stream before using it to index rmidi->streams. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Takashi Iwai --- sound/core/rawmidi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index ee601d7f0926..c0690d1ecd55 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -601,6 +602,7 @@ static int __snd_rawmidi_info_select(struct snd_card *card, return -ENXIO; if (info->stream < 0 || info->stream > 1) return -EINVAL; + info->stream = array_index_nospec(info->stream, 2); pstr = &rmidi->streams[info->stream]; if (pstr->substream_count == 0) return -ENOENT; -- cgit From c709f14f0616482b67f9fbcb965e1493a03ff30b Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 20 Mar 2019 18:42:01 -0500 Subject: ALSA: seq: oss: Fix Spectre v1 vulnerability dev is indirectly controlled by user-space, hence leading to a potential exploitation of the Spectre variant 1 vulnerability. This issue was detected with the help of Smatch: sound/core/seq/oss/seq_oss_synth.c:626 snd_seq_oss_synth_make_info() warn: potential spectre issue 'dp->synths' [w] (local cap) Fix this by sanitizing dev before using it to index dp->synths. Notice that given that speculation windows are large, the policy is to kill the speculation on the first load and not worry if it can be completed with a dependent load/store [1]. [1] https://lore.kernel.org/lkml/20180423164740.GY17484@dhcp22.suse.cz/ Cc: stable@vger.kernel.org Signed-off-by: Gustavo A. R. Silva Signed-off-by: Takashi Iwai --- sound/core/seq/oss/seq_oss_synth.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sound/core/seq/oss/seq_oss_synth.c b/sound/core/seq/oss/seq_oss_synth.c index 278ebb993122..c93945917235 100644 --- a/sound/core/seq/oss/seq_oss_synth.c +++ b/sound/core/seq/oss/seq_oss_synth.c @@ -617,13 +617,14 @@ int snd_seq_oss_synth_make_info(struct seq_oss_devinfo *dp, int dev, struct synth_info *inf) { struct seq_oss_synth *rec; + struct seq_oss_synthinfo *info = get_synthinfo_nospec(dp, dev); - if (dev < 0 || dev >= dp->max_synthdev) + if (!info) return -ENXIO; - if (dp->synths[dev].is_midi) { + if (info->is_midi) { struct midi_info minf; - snd_seq_oss_midi_make_info(dp, dp->synths[dev].midi_mapped, &minf); + snd_seq_oss_midi_make_info(dp, info->midi_mapped, &minf); inf->synth_type = SYNTH_TYPE_MIDI; inf->synth_subtype = 0; inf->nr_voices = 16; -- cgit From 2733ccebf4a937a0858e7d05a4a003b89715033f Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Thu, 21 Mar 2019 16:39:04 +0800 Subject: ALSA: hda/realtek: Enable headset MIC of Acer Aspire Z24-890 with ALC286 The Acer Aspire Z24-890 cannot detect the headset MIC until ALC286_FIXUP_ACER_AIO_HEADSET_MIC quirk applied. Signed-off-by: Jian-Hong Pan Signed-off-by: Daniel Drake Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 1 + 1 file changed, 1 insertion(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 191830d4fa40..916bb5a3f324 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6715,6 +6715,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), + SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC), SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z), SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS), -- cgit From c7531e31c8a440b5fe6bd62664def5bcb6262f96 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Thu, 21 Mar 2019 17:17:31 +0800 Subject: ALSA: hda/realtek - Add support for Acer Aspire E5-523G/ES1-432 headset mic The Acer laptop Aspire E5-523G and ES1-432 with ALC255 can't detect the headset microphone until ALC255_FIXUP_ACER_MIC_NO_PRESENCE quirk applied. Signed-off-by: Chris Chiu Signed-off-by: Daniel Drake Signed-off-by: Jian-Hong Pan Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 916bb5a3f324..6042ddf2b2ae 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -6712,6 +6712,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), + SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), -- cgit From 657ee5531903339b06697581532ed32d4762526e Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:50 -0700 Subject: perf evlist: Introduce side band thread This patch introduces side band thread that captures extended information for events like PERF_RECORD_BPF_EVENT. This new thread uses its own evlist that uses ring buffer with very low watermark for lower latency. To use side band thread, we need to: 1. add side band event(s) by calling perf_evlist__add_sb_event(); 2. calls perf_evlist__start_sb_thread(); 3. at the end of perf run, perf_evlist__stop_sb_thread(). In the next patch, we use this thread to handle PERF_RECORD_BPF_EVENT. Committer notes: Add fix by Jiri Olsa for when te sb_tread can't get started and then at the end the stop_sb_thread() segfaults when joining the (non-existing) thread. That can happen when running 'perf top' or 'perf record' as a normal user, for instance. Further checks need to be done on top of this to more graciously handle these possible failure scenarios. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-15-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 9 ++++ tools/perf/builtin-top.c | 9 ++++ tools/perf/util/evlist.c | 119 ++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evlist.h | 12 +++++ tools/perf/util/evsel.h | 6 +++ 5 files changed, 155 insertions(+) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e79faccd7842..6f645fd72fed 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1137,6 +1137,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) struct perf_data *data = &rec->data; struct perf_session *session; bool disabled = false, draining = false; + struct perf_evlist *sb_evlist = NULL; int fd; atexit(record__sig_exit); @@ -1237,6 +1238,11 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + err = record__synthesize(rec, false); if (err < 0) goto out_child; @@ -1487,6 +1493,9 @@ out_child: out_delete_session: perf_session__delete(session); + + if (!opts->no_bpf_event) + perf_evlist__stop_sb_thread(sb_evlist); return status; } diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c2ea22c4ea67..3ce8a8db6c1d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1501,6 +1501,7 @@ int cmd_top(int argc, const char **argv) "number of thread to run event synthesize"), OPT_END() }; + struct perf_evlist *sb_evlist = NULL; const char * const top_usage[] = { "perf top []", NULL @@ -1636,8 +1637,16 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } + if (perf_evlist__start_sb_thread(sb_evlist, target)) { + pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); + opts->no_bpf_event = true; + } + status = __cmd_top(&top); + if (!opts->no_bpf_event) + perf_evlist__stop_sb_thread(sb_evlist); + out_delete_evlist: perf_evlist__delete(top.evlist); perf_session__delete(top.session); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ed20f4379956..ec78e93085de 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -19,6 +19,7 @@ #include "debug.h" #include "units.h" #include "asm/bug.h" +#include "bpf-event.h" #include #include @@ -1856,3 +1857,121 @@ struct perf_evsel *perf_evlist__reset_weak_group(struct perf_evlist *evsel_list, } return leader; } + +int perf_evlist__add_sb_event(struct perf_evlist **evlist, + struct perf_event_attr *attr, + perf_evsel__sb_cb_t cb, + void *data) +{ + struct perf_evsel *evsel; + bool new_evlist = (*evlist) == NULL; + + if (*evlist == NULL) + *evlist = perf_evlist__new(); + if (*evlist == NULL) + return -1; + + if (!attr->sample_id_all) { + pr_warning("enabling sample_id_all for all side band events\n"); + attr->sample_id_all = 1; + } + + evsel = perf_evsel__new_idx(attr, (*evlist)->nr_entries); + if (!evsel) + goto out_err; + + evsel->side_band.cb = cb; + evsel->side_band.data = data; + perf_evlist__add(*evlist, evsel); + return 0; + +out_err: + if (new_evlist) { + perf_evlist__delete(*evlist); + *evlist = NULL; + } + return -1; +} + +static void *perf_evlist__poll_thread(void *arg) +{ + struct perf_evlist *evlist = arg; + bool draining = false; + int i; + + while (draining || !(evlist->thread.done)) { + if (draining) + draining = false; + else if (evlist->thread.done) + draining = true; + + if (!draining) + perf_evlist__poll(evlist, 1000); + + for (i = 0; i < evlist->nr_mmaps; i++) { + struct perf_mmap *map = &evlist->mmap[i]; + union perf_event *event; + + if (perf_mmap__read_init(map)) + continue; + while ((event = perf_mmap__read_event(map)) != NULL) { + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (evsel && evsel->side_band.cb) + evsel->side_band.cb(event, evsel->side_band.data); + else + pr_warning("cannot locate proper evsel for the side band event\n"); + + perf_mmap__consume(map); + } + perf_mmap__read_done(map); + } + } + return NULL; +} + +int perf_evlist__start_sb_thread(struct perf_evlist *evlist, + struct target *target) +{ + struct perf_evsel *counter; + + if (!evlist) + return 0; + + if (perf_evlist__create_maps(evlist, target)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (perf_evsel__open(counter, evlist->cpus, + evlist->threads) < 0) + goto out_delete_evlist; + } + + if (perf_evlist__mmap(evlist, UINT_MAX)) + goto out_delete_evlist; + + evlist__for_each_entry(evlist, counter) { + if (perf_evsel__enable(counter)) + goto out_delete_evlist; + } + + evlist->thread.done = 0; + if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist)) + goto out_delete_evlist; + + return 0; + +out_delete_evlist: + perf_evlist__delete(evlist); + evlist = NULL; + return -1; +} + +void perf_evlist__stop_sb_thread(struct perf_evlist *evlist) +{ + if (!evlist) + return; + evlist->thread.done = 1; + pthread_join(evlist->thread.th, NULL); + perf_evlist__delete(evlist); +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 744906dd4887..dcb68f34d2cd 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -54,6 +54,10 @@ struct perf_evlist { struct perf_sample *sample); u64 first_sample_time; u64 last_sample_time; + struct { + pthread_t th; + volatile int done; + } thread; }; struct perf_evsel_str_handler { @@ -87,6 +91,14 @@ int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, int perf_evlist__add_dummy(struct perf_evlist *evlist); +int perf_evlist__add_sb_event(struct perf_evlist **evlist, + struct perf_event_attr *attr, + perf_evsel__sb_cb_t cb, + void *data); +int perf_evlist__start_sb_thread(struct perf_evlist *evlist, + struct target *target); +void perf_evlist__stop_sb_thread(struct perf_evlist *evlist); + int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index cc578e02e08f..0f2c6c93d721 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -73,6 +73,8 @@ struct perf_evsel_config_term { struct perf_stat_evsel; +typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data); + /** struct perf_evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -151,6 +153,10 @@ struct perf_evsel { bool collect_stat; bool weak_group; const char *pmu_name; + struct { + perf_evsel__sb_cb_t *cb; + void *data; + } side_band; }; union u64_swap { -- cgit From d56354dc49091e33d9ffca732ac913ed2df70537 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 11 Mar 2019 22:30:51 -0700 Subject: perf tools: Save bpf_prog_info and BTF of new BPF programs To fully annotate BPF programs with source code mapping, 4 different information are needed: 1) PERF_RECORD_KSYMBOL 2) PERF_RECORD_BPF_EVENT 3) bpf_prog_info 4) btf This patch handles 3) and 4) for BPF programs loaded after 'perf record|top'. For timely process of these information, a dedicated event is added to the side band evlist. When PERF_RECORD_BPF_EVENT is received via the side band event, the polling thread gathers 3) and 4) vis sys_bpf and store them in perf_env. This information is saved to perf.data at the end of 'perf record'. Committer testing: The 'wakeup_watermark' member in 'struct perf_event_attr' is inside a unnamed union, so can't be used in a struct designated initialization with older gccs, get it out of that, isolating as 'attr.wakeup_watermark = 1;' to work with all gcc versions. We also need to add '--no-bpf-event' to the 'perf record' perf_event_attr tests in 'perf test', as the way that that test goes is to intercept the events being setup and looking if they match the fields described in the control files, since now it finds first the side band event used to catch the PERF_RECORD_BPF_EVENT, they all fail. With these issues fixed: Same scenario as for testing BPF programs loaded before 'perf record' or 'perf top' starts, only start the BPF programs after 'perf record|top', so that its information get collected by the sideband threads, the rest works as for the programs loaded before start monitoring. Add missing 'inline' to the bpf_event__add_sb_event() when HAVE_LIBBPF_SUPPORT is not defined, fixing the build in systems without binutils devel files installed. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190312053051.2690567-16-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 3 + tools/perf/builtin-top.c | 3 + tools/perf/tests/attr/test-record-C0 | 2 +- tools/perf/tests/attr/test-record-basic | 2 +- tools/perf/tests/attr/test-record-branch-any | 2 +- .../perf/tests/attr/test-record-branch-filter-any | 2 +- .../tests/attr/test-record-branch-filter-any_call | 2 +- .../tests/attr/test-record-branch-filter-any_ret | 2 +- tools/perf/tests/attr/test-record-branch-filter-hv | 2 +- .../tests/attr/test-record-branch-filter-ind_call | 2 +- tools/perf/tests/attr/test-record-branch-filter-k | 2 +- tools/perf/tests/attr/test-record-branch-filter-u | 2 +- tools/perf/tests/attr/test-record-count | 2 +- tools/perf/tests/attr/test-record-data | 2 +- tools/perf/tests/attr/test-record-freq | 2 +- tools/perf/tests/attr/test-record-graph-default | 2 +- tools/perf/tests/attr/test-record-graph-dwarf | 2 +- tools/perf/tests/attr/test-record-graph-fp | 2 +- tools/perf/tests/attr/test-record-group | 2 +- tools/perf/tests/attr/test-record-group-sampling | 2 +- tools/perf/tests/attr/test-record-group1 | 2 +- tools/perf/tests/attr/test-record-no-buffering | 2 +- tools/perf/tests/attr/test-record-no-inherit | 2 +- tools/perf/tests/attr/test-record-no-samples | 2 +- tools/perf/tests/attr/test-record-period | 2 +- tools/perf/tests/attr/test-record-raw | 2 +- tools/perf/util/bpf-event.c | 100 +++++++++++++++++++++ tools/perf/util/bpf-event.h | 15 ++++ 28 files changed, 145 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 6f645fd72fed..4e2d953d4bc5 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1238,6 +1238,9 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) goto out_child; } + if (!opts->no_bpf_event) + bpf_event__add_sb_event(&sb_evlist, &session->header.env); + if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3ce8a8db6c1d..1999d6533d12 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1637,6 +1637,9 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; } + if (!top.record_opts.no_bpf_event) + bpf_event__add_sb_event(&sb_evlist, &perf_env); + if (perf_evlist__start_sb_thread(sb_evlist, target)) { pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n"); opts->no_bpf_event = true; diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0 index cb0a3138fa54..93818054ae20 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/attr/test-record-C0 @@ -1,6 +1,6 @@ [config] command = record -args = -C 0 kill >/dev/null 2>&1 +args = --no-bpf-event -C 0 kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/attr/test-record-basic index 85a23cf35ba1..b0ca42a5ecc9 100644 --- a/tools/perf/tests/attr/test-record-basic +++ b/tools/perf/tests/attr/test-record-basic @@ -1,6 +1,6 @@ [config] command = record -args = kill >/dev/null 2>&1 +args = --no-bpf-event kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/attr/test-record-branch-any index 81f839e2fad0..1a99b3ce6b89 100644 --- a/tools/perf/tests/attr/test-record-branch-any +++ b/tools/perf/tests/attr/test-record-branch-any @@ -1,6 +1,6 @@ [config] command = record -args = -b kill >/dev/null 2>&1 +args = --no-bpf-event -b kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/attr/test-record-branch-filter-any index 357421f4dfce..709768b508c6 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any +++ b/tools/perf/tests/attr/test-record-branch-filter-any @@ -1,6 +1,6 @@ [config] command = record -args = -j any kill >/dev/null 2>&1 +args = --no-bpf-event -j any kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/attr/test-record-branch-filter-any_call index dbc55f2ab845..f943221f7825 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_call +++ b/tools/perf/tests/attr/test-record-branch-filter-any_call @@ -1,6 +1,6 @@ [config] command = record -args = -j any_call kill >/dev/null 2>&1 +args = --no-bpf-event -j any_call kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/attr/test-record-branch-filter-any_ret index a0824ff8e131..fd4f5b4154a9 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_ret +++ b/tools/perf/tests/attr/test-record-branch-filter-any_ret @@ -1,6 +1,6 @@ [config] command = record -args = -j any_ret kill >/dev/null 2>&1 +args = --no-bpf-event -j any_ret kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/attr/test-record-branch-filter-hv index f34d6f120181..4e52d685ebe1 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-hv +++ b/tools/perf/tests/attr/test-record-branch-filter-hv @@ -1,6 +1,6 @@ [config] command = record -args = -j hv kill >/dev/null 2>&1 +args = --no-bpf-event -j hv kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/attr/test-record-branch-filter-ind_call index b86a35232248..e08c6ab3796e 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-ind_call +++ b/tools/perf/tests/attr/test-record-branch-filter-ind_call @@ -1,6 +1,6 @@ [config] command = record -args = -j ind_call kill >/dev/null 2>&1 +args = --no-bpf-event -j ind_call kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/attr/test-record-branch-filter-k index d3fbc5e1858a..b4b98f84fc2f 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-k +++ b/tools/perf/tests/attr/test-record-branch-filter-k @@ -1,6 +1,6 @@ [config] command = record -args = -j k kill >/dev/null 2>&1 +args = --no-bpf-event -j k kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/attr/test-record-branch-filter-u index a318f0dda173..fb9610edbb0d 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-u +++ b/tools/perf/tests/attr/test-record-branch-filter-u @@ -1,6 +1,6 @@ [config] command = record -args = -j u kill >/dev/null 2>&1 +args = --no-bpf-event -j u kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/attr/test-record-count index 34f6cc577263..5e9b9019d786 100644 --- a/tools/perf/tests/attr/test-record-count +++ b/tools/perf/tests/attr/test-record-count @@ -1,6 +1,6 @@ [config] command = record -args = -c 123 kill >/dev/null 2>&1 +args = --no-bpf-event -c 123 kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/attr/test-record-data index a9cf2233b0ce..a99bb13149c2 100644 --- a/tools/perf/tests/attr/test-record-data +++ b/tools/perf/tests/attr/test-record-data @@ -1,6 +1,6 @@ [config] command = record -args = -d kill >/dev/null 2>&1 +args = --no-bpf-event -d kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/attr/test-record-freq index bf4cb459f0d5..89e29f6b2ae0 100644 --- a/tools/perf/tests/attr/test-record-freq +++ b/tools/perf/tests/attr/test-record-freq @@ -1,6 +1,6 @@ [config] command = record -args = -F 100 kill >/dev/null 2>&1 +args = --no-bpf-event -F 100 kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/attr/test-record-graph-default index 0b216e69760c..5d8234d50845 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/attr/test-record-graph-default @@ -1,6 +1,6 @@ [config] command = record -args = -g kill >/dev/null 2>&1 +args = --no-bpf-event -g kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/attr/test-record-graph-dwarf index da2fa73bd0a2..ae92061d611d 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/attr/test-record-graph-dwarf @@ -1,6 +1,6 @@ [config] command = record -args = --call-graph dwarf -- kill >/dev/null 2>&1 +args = --no-bpf-event --call-graph dwarf -- kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/attr/test-record-graph-fp index 625d190bb798..5630521c0b0f 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/attr/test-record-graph-fp @@ -1,6 +1,6 @@ [config] command = record -args = --call-graph fp kill >/dev/null 2>&1 +args = --no-bpf-event --call-graph fp kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group index 618ba1c17474..14ee60fd3f41 100644 --- a/tools/perf/tests/attr/test-record-group +++ b/tools/perf/tests/attr/test-record-group @@ -1,6 +1,6 @@ [config] command = record -args = --group -e cycles,instructions kill >/dev/null 2>&1 +args = --no-bpf-event --group -e cycles,instructions kill >/dev/null 2>&1 ret = 1 [event-1:base-record] diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/attr/test-record-group-sampling index f0729c454f16..300b9f7e6d69 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/attr/test-record-group-sampling @@ -1,6 +1,6 @@ [config] command = record -args = -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 ret = 1 [event-1:base-record] diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/attr/test-record-group1 index 48e8bd12fe46..3ffe246e0228 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/attr/test-record-group1 @@ -1,6 +1,6 @@ [config] command = record -args = -e '{cycles,instructions}' kill >/dev/null 2>&1 +args = --no-bpf-event -e '{cycles,instructions}' kill >/dev/null 2>&1 ret = 1 [event-1:base-record] diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/attr/test-record-no-buffering index aa3956d8fe20..583dcbb078ba 100644 --- a/tools/perf/tests/attr/test-record-no-buffering +++ b/tools/perf/tests/attr/test-record-no-buffering @@ -1,6 +1,6 @@ [config] command = record -args = --no-buffering kill >/dev/null 2>&1 +args = --no-bpf-event --no-buffering kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/attr/test-record-no-inherit index 560943decb87..15d1dc162e1c 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/attr/test-record-no-inherit @@ -1,6 +1,6 @@ [config] command = record -args = -i kill >/dev/null 2>&1 +args = --no-bpf-event -i kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/attr/test-record-no-samples index 8eb73ab639e0..596fbd6d5a2c 100644 --- a/tools/perf/tests/attr/test-record-no-samples +++ b/tools/perf/tests/attr/test-record-no-samples @@ -1,6 +1,6 @@ [config] command = record -args = -n kill >/dev/null 2>&1 +args = --no-bpf-event -n kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/attr/test-record-period index 69bc748f0f27..119101154c5e 100644 --- a/tools/perf/tests/attr/test-record-period +++ b/tools/perf/tests/attr/test-record-period @@ -1,6 +1,6 @@ [config] command = record -args = -c 100 -P kill >/dev/null 2>&1 +args = --no-bpf-event -c 100 -P kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/attr/test-record-raw index a188a614a44c..13a5f7860c78 100644 --- a/tools/perf/tests/attr/test-record-raw +++ b/tools/perf/tests/attr/test-record-raw @@ -1,6 +1,6 @@ [config] command = record -args = -R kill >/dev/null 2>&1 +args = --no-bpf-event -R kill >/dev/null 2>&1 ret = 1 [event:base-record] diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 7ffe7db59828..2a8c245ca942 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -13,6 +13,7 @@ #include "env.h" #include "session.h" #include "map.h" +#include "evlist.h" #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) @@ -330,3 +331,102 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, free(event); return err; } + +static void perf_env__add_bpf_info(struct perf_env *env, u32 id) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info_node *info_node; + struct btf *btf = NULL; + u64 arrays; + u32 btf_id; + int fd; + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) + return; + + arrays = 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_PROG_TAGS; + arrays |= 1UL << BPF_PROG_INFO_JITED_INSNS; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + + info_linear = bpf_program__get_prog_info_linear(fd, arrays); + if (IS_ERR_OR_NULL(info_linear)) { + pr_debug("%s: failed to get BPF program info. aborting\n", __func__); + goto out; + } + + btf_id = info_linear->info.btf_id; + + info_node = malloc(sizeof(struct bpf_prog_info_node)); + if (info_node) { + info_node->info_linear = info_linear; + perf_env__insert_bpf_prog_info(env, info_node); + } else + free(info_linear); + + if (btf_id == 0) + goto out; + + if (btf__get_from_id(btf_id, &btf)) { + pr_debug("%s: failed to get BTF of id %u, aborting\n", + __func__, btf_id); + goto out; + } + perf_env__fetch_btf(env, btf_id, btf); + +out: + free(btf); + close(fd); +} + +static int bpf_event__sb_cb(union perf_event *event, void *data) +{ + struct perf_env *env = data; + + if (event->header.type != PERF_RECORD_BPF_EVENT) + return -1; + + switch (event->bpf_event.type) { + case PERF_BPF_EVENT_PROG_LOAD: + perf_env__add_bpf_info(env, event->bpf_event.id); + + case PERF_BPF_EVENT_PROG_UNLOAD: + /* + * Do not free bpf_prog_info and btf of the program here, + * as annotation still need them. They will be freed at + * the end of the session. + */ + break; + default: + pr_debug("unexpected bpf_event type of %d\n", + event->bpf_event.type); + break; + } + + return 0; +} + +int bpf_event__add_sb_event(struct perf_evlist **evlist, + struct perf_env *env) +{ + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_DUMMY, + .sample_id_all = 1, + .watermark = 1, + .bpf_event = 1, + .size = sizeof(attr), /* to capture ABI version */ + }; + + /* + * Older gcc versions don't support designated initializers, like above, + * for unnamed union members, such as the following: + */ + attr.wakeup_watermark = 1; + + return perf_evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); +} diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index b9ec394dc7c7..8cb1189149ec 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -4,12 +4,17 @@ #include #include +#include +#include #include "event.h" struct machine; union perf_event; +struct perf_env; struct perf_sample; struct record_opts; +struct evlist; +struct target; struct bpf_prog_info_node { struct bpf_prog_info_linear *info_linear; @@ -31,6 +36,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, perf_event__handler_t process, struct machine *machine, struct record_opts *opts); +int bpf_event__add_sb_event(struct perf_evlist **evlist, + struct perf_env *env); + #else static inline int machine__process_bpf_event(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -46,5 +54,12 @@ static inline int perf_event__synthesize_bpf_events(struct perf_session *session { return 0; } + +static inline int bpf_event__add_sb_event(struct perf_evlist **evlist __maybe_unused, + struct perf_env *env __maybe_unused) +{ + return 0; +} + #endif // HAVE_LIBBPF_SUPPORT #endif -- cgit From fc462ac75b36daaa61e9bda7fba66ed1b3a500b4 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Mar 2019 09:54:53 -0700 Subject: perf bpf: Extract logic to create program names from perf_event__synthesize_one_bpf_prog() Extract logic to create program names to synthesize_bpf_prog_name(), so that it can be reused in header.c:print_bpf_prog_info(). This commit doesn't change the behavior. Signed-off-by: Song Liu Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190319165454.1298742-2-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 62 +++++++++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 27 deletions(-) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 2a8c245ca942..d5b041649f26 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -111,6 +111,38 @@ static int perf_env__fetch_btf(struct perf_env *env, return 0; } +static int synthesize_bpf_prog_name(char *buf, int size, + struct bpf_prog_info *info, + struct btf *btf, + u32 sub_id) +{ + u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(uintptr_t)(info->prog_tags); + void *func_infos = (void *)(uintptr_t)(info->func_info); + u32 sub_prog_cnt = info->nr_jited_ksyms; + const struct bpf_func_info *finfo; + const char *short_name = NULL; + const struct btf_type *t; + int name_len; + + name_len = snprintf(buf, size, "bpf_prog_"); + name_len += snprintf_hex(buf + name_len, size - name_len, + prog_tags[sub_id], BPF_TAG_SIZE); + if (btf) { + finfo = func_infos + sub_id * info->func_info_rec_size; + t = btf__type_by_id(btf, finfo->type_id); + short_name = btf__name_by_offset(btf, t->name_off); + } else if (sub_id == 0 && sub_prog_cnt == 1) { + /* no subprog */ + if (info->name[0]) + short_name = info->name; + } else + short_name = "F"; + if (short_name) + name_len += snprintf(buf + name_len, size - name_len, + "_%s", short_name); + return name_len; +} + /* * Synthesize PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT for one bpf * program. One PERF_RECORD_BPF_EVENT is generated for the program. And @@ -135,7 +167,6 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, struct bpf_prog_info_node *info_node; struct bpf_prog_info *info; struct btf *btf = NULL; - bool has_btf = false; struct perf_env *env; u32 sub_prog_cnt, i; int err = 0; @@ -189,19 +220,13 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, btf = NULL; goto out; } - has_btf = true; perf_env__fetch_btf(env, info->btf_id, btf); } /* Synthesize PERF_RECORD_KSYMBOL */ for (i = 0; i < sub_prog_cnt; i++) { - u8 (*prog_tags)[BPF_TAG_SIZE] = (void *)(uintptr_t)(info->prog_tags); - __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); + __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); - void *func_infos = (void *)(uintptr_t)(info->func_info); - const struct bpf_func_info *finfo; - const char *short_name = NULL; - const struct btf_type *t; int name_len; *ksymbol_event = (struct ksymbol_event){ @@ -214,26 +239,9 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session, .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF, .flags = 0, }; - name_len = snprintf(ksymbol_event->name, KSYM_NAME_LEN, - "bpf_prog_"); - name_len += snprintf_hex(ksymbol_event->name + name_len, - KSYM_NAME_LEN - name_len, - prog_tags[i], BPF_TAG_SIZE); - if (has_btf) { - finfo = func_infos + i * info->func_info_rec_size; - t = btf__type_by_id(btf, finfo->type_id); - short_name = btf__name_by_offset(btf, t->name_off); - } else if (i == 0 && sub_prog_cnt == 1) { - /* no subprog */ - if (info->name[0]) - short_name = info->name; - } else - short_name = "F"; - if (short_name) - name_len += snprintf(ksymbol_event->name + name_len, - KSYM_NAME_LEN - name_len, - "_%s", short_name); + name_len = synthesize_bpf_prog_name(ksymbol_event->name, + KSYM_NAME_LEN, info, btf, i); ksymbol_event->header.size += PERF_ALIGN(name_len + 1, sizeof(u64)); -- cgit From f8dfeae009effc0b6dac2741cf8d7cbb91edb982 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Tue, 19 Mar 2019 09:54:54 -0700 Subject: perf bpf: Show more BPF program info in print_bpf_prog_info() This patch enables showing bpf program name, address, and size in the header. Before the patch: perf report --header-only ... # bpf_prog_info of id 9 # bpf_prog_info of id 10 # bpf_prog_info of id 13 After the patch: # bpf_prog_info 9: bpf_prog_7be49e3934a125ba addr 0xffffffffa0024947 size 229 # bpf_prog_info 10: bpf_prog_2a142ef67aaad174 addr 0xffffffffa007c94d size 229 # bpf_prog_info 13: bpf_prog_47368425825d7384_task__task_newt addr 0xffffffffa0251137 size 369 Committer notes: Fix the fallback definition when HAVE_LIBBPF_SUPPORT is not defined, i.e. add the missing 'static inline' and add the __maybe_unused to the args. Also add stdio.h since we now use FILE * in bpf-event.h. Signed-off-by: Song Liu Tested-by: Arnaldo Carvalho de Melo Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stanislav Fomichev Link: http://lkml.kernel.org/r/20190319165454.1298742-3-songliubraving@fb.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-event.c | 40 ++++++++++++++++++++++++++++++++++++++++ tools/perf/util/bpf-event.h | 11 ++++++++++- tools/perf/util/header.c | 5 +++-- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index d5b041649f26..2a4a0da35632 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -438,3 +438,43 @@ int bpf_event__add_sb_event(struct perf_evlist **evlist, return perf_evlist__add_sb_event(evlist, &attr, bpf_event__sb_cb, env); } + +void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp) +{ + __u32 *prog_lens = (__u32 *)(uintptr_t)(info->jited_func_lens); + __u64 *prog_addrs = (__u64 *)(uintptr_t)(info->jited_ksyms); + char name[KSYM_NAME_LEN]; + struct btf *btf = NULL; + u32 sub_prog_cnt, i; + + sub_prog_cnt = info->nr_jited_ksyms; + if (sub_prog_cnt != info->nr_prog_tags || + sub_prog_cnt != info->nr_jited_func_lens) + return; + + if (info->btf_id) { + struct btf_node *node; + + node = perf_env__find_btf(env, info->btf_id); + if (node) + btf = btf__new((__u8 *)(node->data), + node->data_size); + } + + if (sub_prog_cnt == 1) { + synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, 0); + fprintf(fp, "# bpf_prog_info %u: %s addr 0x%llx size %u\n", + info->id, name, prog_addrs[0], prog_lens[0]); + return; + } + + fprintf(fp, "# bpf_prog_info %u:\n", info->id); + for (i = 0; i < sub_prog_cnt; i++) { + synthesize_bpf_prog_name(name, KSYM_NAME_LEN, info, btf, i); + + fprintf(fp, "# \tsub_prog %u: %s addr 0x%llx size %u\n", + i, name, prog_addrs[i], prog_lens[i]); + } +} diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h index 8cb1189149ec..04c33b3bfe28 100644 --- a/tools/perf/util/bpf-event.h +++ b/tools/perf/util/bpf-event.h @@ -7,6 +7,7 @@ #include #include #include "event.h" +#include struct machine; union perf_event; @@ -38,7 +39,9 @@ int perf_event__synthesize_bpf_events(struct perf_session *session, struct record_opts *opts); int bpf_event__add_sb_event(struct perf_evlist **evlist, struct perf_env *env); - +void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info, + struct perf_env *env, + FILE *fp); #else static inline int machine__process_bpf_event(struct machine *machine __maybe_unused, union perf_event *event __maybe_unused, @@ -61,5 +64,11 @@ static inline int bpf_event__add_sb_event(struct perf_evlist **evlist __maybe_un return 0; } +static inline void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info __maybe_unused, + struct perf_env *env __maybe_unused, + FILE *fp __maybe_unused) +{ + +} #endif // HAVE_LIBBPF_SUPPORT #endif diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 01dda2f65d36..b9e693825873 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1468,8 +1468,9 @@ static void print_bpf_prog_info(struct feat_fd *ff, FILE *fp) node = rb_entry(next, struct bpf_prog_info_node, rb_node); next = rb_next(&node->rb_node); - fprintf(fp, "# bpf_prog_info of id %u\n", - node->info_linear->info.id); + + bpf_event__print_bpf_prog_info(&node->info_linear->info, + env, fp); } up_read(&env->bpf_progs.lock); -- cgit From f27b744baaa646a7c0a01443cc0d8b4787cac2f7 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 21 Mar 2019 23:14:48 +0800 Subject: irqchip/irq-mvebu-sei: Make mvebu_sei_ap806_caps static Fix sparse warning: drivers/irqchip/irq-mvebu-sei.c:481:23: warning: symbol 'mvebu_sei_ap806_caps' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: Cc: Cc: Link: https://lkml.kernel.org/r/20190321151448.15600-1-yuehaibing@huawei.com --- drivers/irqchip/irq-mvebu-sei.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/irqchip/irq-mvebu-sei.c b/drivers/irqchip/irq-mvebu-sei.c index add4c9c934c8..18832ccc8ff8 100644 --- a/drivers/irqchip/irq-mvebu-sei.c +++ b/drivers/irqchip/irq-mvebu-sei.c @@ -478,7 +478,7 @@ dispose_irq: return ret; } -struct mvebu_sei_caps mvebu_sei_ap806_caps = { +static struct mvebu_sei_caps mvebu_sei_ap806_caps = { .ap_range = { .first = 0, .size = 21, -- cgit From 33872d79f5d1cbedaaab79669cc38f16097a9450 Mon Sep 17 00:00:00 2001 From: Erik Hugne Date: Thu, 21 Mar 2019 09:11:59 +0100 Subject: tipc: fix cancellation of topology subscriptions When cancelling a subscription, we have to clear the cancel bit in the request before iterating over any established subscriptions with memcmp. Otherwise no subscription will ever be found, and it will not be possible to explicitly unsubscribe individual subscriptions. Fixes: 8985ecc7c1e0 ("tipc: simplify endianness handling in topology subscriber") Signed-off-by: Erik Hugne Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/topsrv.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 4a708a4e8583..b45932d78004 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -363,6 +363,7 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, struct tipc_subscription *sub; if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { + s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL); tipc_conn_delete_sub(con, s); return 0; } -- cgit From ceabee6c59943bdd5e1da1a6a20dc7ee5f8113a2 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 21 Mar 2019 15:02:50 +0800 Subject: genetlink: Fix a memory leak on error path In genl_register_family(), when idr_alloc() fails, we forget to free the memory we possibly allocate for family->attrbuf. Reported-by: Hulk Robot Fixes: 2ae0f17df1cd ("genetlink: use idr to track families") Signed-off-by: YueHaibing Reviewed-by: Kirill Tkhai Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 25eeb6d2a75a..f0ec068e1d02 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -366,7 +366,7 @@ int genl_register_family(struct genl_family *family) start, end + 1, GFP_KERNEL); if (family->id < 0) { err = family->id; - goto errout_locked; + goto errout_free; } err = genl_validate_assign_mc_groups(family); @@ -385,6 +385,7 @@ int genl_register_family(struct genl_family *family) errout_remove: idr_remove(&genl_fam_idr, family->id); +errout_free: kfree(family->attrbuf); errout_locked: genl_unlock_all(); -- cgit From 0ab925d3690614aa44cd29fb84cdcef03eab97dc Mon Sep 17 00:00:00 2001 From: Nicholas Kazlauskas Date: Thu, 21 Mar 2019 11:53:45 -0400 Subject: drm/amd/display: Only allow VRR when vrefresh is within supported range [Why] Black screens or artifacting can occur when enabling FreeSync outside of the supported range of the monitor. This can happen since the supported range isn't always the min/max vrefresh range available for the monitor. [How] There was previously a fix that prevented this from happening in the low range but it didn't cover the upper range. Expand the condition to include both. Cc: Sun peng Li Cc: Harry Wentland Signed-off-by: Nicholas Kazlauskas Acked-by: Alex Deucher Reviewed-by: Harry Wentland Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index fb27783d7a54..81127f7d6ed1 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -5429,9 +5429,11 @@ static void get_freesync_config_for_crtc( struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(new_con_state->base.connector); struct drm_display_mode *mode = &new_crtc_state->base.mode; + int vrefresh = drm_mode_vrefresh(mode); new_crtc_state->vrr_supported = new_con_state->freesync_capable && - aconnector->min_vfreq <= drm_mode_vrefresh(mode); + vrefresh >= aconnector->min_vfreq && + vrefresh <= aconnector->max_vfreq; if (new_crtc_state->vrr_supported) { new_crtc_state->stream->ignore_msa_timing_param = true; -- cgit From 69903dfae0310afe8a15f5cd4e376ebb7c6da1d2 Mon Sep 17 00:00:00 2001 From: Manasi Navare Date: Tue, 19 Mar 2019 15:18:47 -0700 Subject: drm/i915/icl: Fix the TRANS_DDI_FUNC_CTL2 bitfield macro This patch fixes the PORT_SYNC_MODE_MASTER_SELECT macro to correctly do the left shifting to set the port sync master select correctly. I have tested this fix on ICL. Fixes: 49edbd49786e ("drm/i915/icl: Define TRANS_DDI_FUNC_CTL DSI registers") Cc: Madhav Chauhan Cc: Jani Nikula Cc: # v5.0+ Signed-off-by: Manasi Navare Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20190319221847.21311-1-manasi.d.navare@intel.com (cherry picked from commit 7264aebb81d15aa6bbed650c816bba90f026bc35) Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 638a586469f9..684589acc368 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -9243,7 +9243,7 @@ enum skl_power_gate { #define TRANS_DDI_FUNC_CTL2(tran) _MMIO_TRANS2(tran, \ _TRANS_DDI_FUNC_CTL2_A) #define PORT_SYNC_MODE_ENABLE (1 << 4) -#define PORT_SYNC_MODE_MASTER_SELECT(x) ((x) < 0) +#define PORT_SYNC_MODE_MASTER_SELECT(x) ((x) << 0) #define PORT_SYNC_MODE_MASTER_SELECT_MASK (0x7 << 0) #define PORT_SYNC_MODE_MASTER_SELECT_SHIFT 0 -- cgit From 06acc17a96215a11134114aee26532b12dc8fde1 Mon Sep 17 00:00:00 2001 From: Dan Murphy Date: Wed, 20 Mar 2019 07:36:55 -0500 Subject: net: phy: Add DP83825I to the DP83822 driver Add the DP83825I ethernet PHY to the DP83822 driver. These devices share the same WoL register bits and addresses. The phy_driver init was made into a macro as there may be future devices appended to this driver that will share the register space. http://www.ti.com/lit/gpn/dp83825i Reviewed-by: Florian Fainelli Signed-off-by: Dan Murphy Signed-off-by: David S. Miller --- drivers/net/phy/dp83822.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index bbd8c22067f3..97d45bd5b38e 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -15,6 +15,8 @@ #include #define DP83822_PHY_ID 0x2000a240 +#define DP83825I_PHY_ID 0x2000a150 + #define DP83822_DEVADDR 0x1f #define MII_DP83822_PHYSCR 0x11 @@ -304,26 +306,30 @@ static int dp83822_resume(struct phy_device *phydev) return 0; } +#define DP83822_PHY_DRIVER(_id, _name) \ + { \ + PHY_ID_MATCH_MODEL(_id), \ + .name = (_name), \ + .features = PHY_BASIC_FEATURES, \ + .soft_reset = dp83822_phy_reset, \ + .config_init = dp83822_config_init, \ + .get_wol = dp83822_get_wol, \ + .set_wol = dp83822_set_wol, \ + .ack_interrupt = dp83822_ack_interrupt, \ + .config_intr = dp83822_config_intr, \ + .suspend = dp83822_suspend, \ + .resume = dp83822_resume, \ + } + static struct phy_driver dp83822_driver[] = { - { - .phy_id = DP83822_PHY_ID, - .phy_id_mask = 0xfffffff0, - .name = "TI DP83822", - .features = PHY_BASIC_FEATURES, - .config_init = dp83822_config_init, - .soft_reset = dp83822_phy_reset, - .get_wol = dp83822_get_wol, - .set_wol = dp83822_set_wol, - .ack_interrupt = dp83822_ack_interrupt, - .config_intr = dp83822_config_intr, - .suspend = dp83822_suspend, - .resume = dp83822_resume, - }, + DP83822_PHY_DRIVER(DP83822_PHY_ID, "TI DP83822"), + DP83822_PHY_DRIVER(DP83825I_PHY_ID, "TI DP83825I"), }; module_phy_driver(dp83822_driver); static struct mdio_device_id __maybe_unused dp83822_tbl[] = { { DP83822_PHY_ID, 0xfffffff0 }, + { DP83825I_PHY_ID, 0xfffffff0 }, { }, }; MODULE_DEVICE_TABLE(mdio, dp83822_tbl); -- cgit From cd5afa91f078c0787be0a62b5ef90301c00b0271 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Wed, 20 Mar 2019 19:12:22 +0530 Subject: net: macb: Add null check for PCLK and HCLK Both PCLK and HCLK are "required" clocks according to macb devicetree documentation. There is a chance that devm_clk_get doesn't return a negative error but just a NULL clock structure instead. In such a case the driver proceeds as usual and uses pclk value 0 to calculate MDC divisor which is incorrect. Hence fix the same in clock initialization. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller --- drivers/net/ethernet/cadence/macb_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index ad099fd01b45..1522aee81884 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -3370,14 +3370,20 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, *hclk = devm_clk_get(&pdev->dev, "hclk"); } - if (IS_ERR(*pclk)) { + if (IS_ERR_OR_NULL(*pclk)) { err = PTR_ERR(*pclk); + if (!err) + err = -ENODEV; + dev_err(&pdev->dev, "failed to get macb_clk (%u)\n", err); return err; } - if (IS_ERR(*hclk)) { + if (IS_ERR_OR_NULL(*hclk)) { err = PTR_ERR(*hclk); + if (!err) + err = -ENODEV; + dev_err(&pdev->dev, "failed to get hclk (%u)\n", err); return err; } -- cgit From 85d0966fa57e0ef2d30d913c98ca93674f7a03c9 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 14:59:59 +0100 Subject: net/sched: prepare TC actions to properly validate the control action - pass a pointer to struct tcf_proto in each actions's init() handler, to allow validating the control action, checking whether the chain exists and (eventually) refcounting it. - remove code that validates the control action after a successful call to the action's init() handler, and replace it with a test that forbids addition of actions having 'goto_chain' and NULL goto_chain pointer at the same time. - add tcf_action_check_ctrlact(), that will validate the control action and eventually allocate the action 'goto_chain' within the init() handler. - add tcf_action_set_ctrlact(), that will assign the control action and swap the current 'goto_chain' pointer with the new given one. This disallows 'goto_chain' on actions that don't initialize it properly in their init() handler, i.e. calling tcf_action_check_ctrlact() after successful IDR reservation and then calling tcf_action_set_ctrlact() to assign 'goto_chain' and 'tcf_action' consistently. By doing this, the kernel does not leak anymore refcounts when a valid 'goto chain' handle is replaced in TC actions, causing kmemleak splats like the following one: # tc chain add dev dd0 chain 42 ingress protocol ip flower \ > ip_proto tcp action drop # tc chain add dev dd0 chain 43 ingress protocol ip flower \ > ip_proto udp action drop # tc filter add dev dd0 ingress matchall \ > action gact goto chain 42 index 66 # tc filter replace dev dd0 ingress matchall \ > action gact goto chain 43 index 66 # echo scan >/sys/kernel/debug/kmemleak <...> unreferenced object 0xffff93c0ee09f000 (size 1024): comm "tc", pid 2565, jiffies 4295339808 (age 65.426s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ 00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0 [<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0 [<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110 [<000000006126a348>] netlink_unicast+0x1a0/0x250 [<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0 [<00000000a25a2171>] sock_sendmsg+0x36/0x40 [<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0 [<00000000d0422042>] __sys_sendmsg+0x5e/0xa0 [<000000007a6c61f9>] do_syscall_64+0x5b/0x180 [<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [<0000000013eaa334>] 0xffffffffffffffff Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/net/act_api.h | 7 +++- net/sched/act_api.c | 97 ++++++++++++++++++++++++++-------------------- net/sched/act_bpf.c | 2 +- net/sched/act_connmark.c | 1 + net/sched/act_csum.c | 2 +- net/sched/act_gact.c | 2 +- net/sched/act_ife.c | 2 +- net/sched/act_ipt.c | 11 +++--- net/sched/act_mirred.c | 1 + net/sched/act_nat.c | 3 +- net/sched/act_pedit.c | 2 +- net/sched/act_police.c | 1 + net/sched/act_sample.c | 2 +- net/sched/act_simple.c | 2 +- net/sched/act_skbedit.c | 1 + net/sched/act_skbmod.c | 1 + net/sched/act_tunnel_key.c | 1 + net/sched/act_vlan.c | 2 +- 18 files changed, 84 insertions(+), 56 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index c745e9ccfab2..54fbb49bd08a 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -90,7 +90,7 @@ struct tc_action_ops { int (*lookup)(struct net *net, struct tc_action **a, u32 index); int (*init)(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack); int (*walk)(struct net *, struct sk_buff *, struct netlink_callback *, int, @@ -181,6 +181,11 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int); int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int); +int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, + struct tcf_chain **handle, + struct netlink_ext_ack *newchain); +struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, + struct tcf_chain *newchain); #endif /* CONFIG_NET_CLS_ACT */ static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes, diff --git a/net/sched/act_api.c b/net/sched/act_api.c index aecf1bf233c8..fe67b98ac641 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -28,23 +28,6 @@ #include #include -static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp) -{ - u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK; - - if (!tp) - return -EINVAL; - a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index); - if (!a->goto_chain) - return -ENOMEM; - return 0; -} - -static void tcf_action_goto_chain_fini(struct tc_action *a) -{ - tcf_chain_put_by_act(a->goto_chain); -} - static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { @@ -71,6 +54,53 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, call_rcu(&old->rcu, tcf_free_cookie_rcu); } +int tcf_action_check_ctrlact(int action, struct tcf_proto *tp, + struct tcf_chain **newchain, + struct netlink_ext_ack *extack) +{ + int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL; + u32 chain_index; + + if (!opcode) + ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0; + else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC) + ret = 0; + if (ret) { + NL_SET_ERR_MSG(extack, "invalid control action"); + goto end; + } + + if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) { + chain_index = action & TC_ACT_EXT_VAL_MASK; + if (!tp || !newchain) { + ret = -EINVAL; + NL_SET_ERR_MSG(extack, + "can't goto NULL proto/chain"); + goto end; + } + *newchain = tcf_chain_get_by_act(tp->chain->block, chain_index); + if (!*newchain) { + ret = -ENOMEM; + NL_SET_ERR_MSG(extack, + "can't allocate goto_chain"); + } + } +end: + return ret; +} +EXPORT_SYMBOL(tcf_action_check_ctrlact); + +struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, + struct tcf_chain *newchain) +{ + struct tcf_chain *oldchain = a->goto_chain; + + a->tcfa_action = action; + a->goto_chain = newchain; + return oldchain; +} +EXPORT_SYMBOL(tcf_action_set_ctrlact); + /* XXX: For standalone actions, we don't need a RCU grace period either, because * actions are always connected to filters and filters are already destroyed in * RCU callbacks, so after a RCU grace period actions are already disconnected @@ -78,13 +108,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie, */ static void free_tcf(struct tc_action *p) { + struct tcf_chain *chain = p->goto_chain; + free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); free_percpu(p->cpu_qstats); tcf_set_action_cookie(&p->act_cookie, NULL); - if (p->goto_chain) - tcf_action_goto_chain_fini(p); + if (chain) + tcf_chain_put_by_act(chain); kfree(p); } @@ -800,15 +832,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } -static bool tcf_action_valid(int action) -{ - int opcode = TC_ACT_EXT_OPCODE(action); - - if (!opcode) - return action <= TC_ACT_VALUE_MAX; - return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC; -} - struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, struct nlattr *nla, struct nlattr *est, char *name, int ovr, int bind, @@ -890,10 +913,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, /* backward compatibility for policer */ if (name == NULL) err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind, - rtnl_held, extack); + rtnl_held, tp, extack); else err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held, - extack); + tp, extack); if (err < 0) goto err_mod; @@ -907,18 +930,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (err != ACT_P_CREATED) module_put(a_o->owner); - if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) { - err = tcf_action_goto_chain_init(a, tp); - if (err) { - tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Failed to init TC action chain"); - return ERR_PTR(err); - } - } - - if (!tcf_action_valid(a->tcfa_action)) { + if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && + !a->goto_chain) { tcf_action_destroy_1(a, bind); - NL_SET_ERR_MSG(extack, "Invalid control action value"); + NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); return ERR_PTR(-EINVAL); } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index aa5c38d11a30..3c0468f2aae6 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -278,7 +278,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog, static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **act, int replace, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 5d24993cccfe..44aa046a92ea 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -97,6 +97,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = { static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, connmark_net_id); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index c79aca29505e..9ba0f61a1e82 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, csum_net_id); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 93da0004e9f4..b8ad311bd8cc 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -57,7 +57,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = { static int tcf_gact_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 9b1f2b3990ee..c1ba74d5c1e3 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -469,7 +469,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, static int tcf_ife_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 98f5b6ea77b4..04a0b5c61194 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -97,7 +97,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, - const struct tc_action_ops *ops, int ovr, int bind) + const struct tc_action_ops *ops, int ovr, int bind, + struct tcf_proto *tp) { struct tc_action_net *tn = net_generic(net, id); struct nlattr *tb[TCA_IPT_MAX + 1]; @@ -205,20 +206,20 @@ err1: static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, - bind); + bind, tp); } static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool unlocked, + int bind, bool unlocked, struct tcf_proto *tp, struct netlink_ext_ack *extack) { return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, - bind); + bind, tp); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6692fd054617..383f4024452c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -94,6 +94,7 @@ static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, mirred_net_id); diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 543eab9193f1..de4b493e26d2 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -38,7 +38,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, - bool rtnl_held, struct netlink_ext_ack *extack) + bool rtnl_held, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index a80373878df7..8ca82aefa11a 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -138,7 +138,7 @@ nla_failure: static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8271a6263824..229eba7925e5 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -83,6 +83,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { static int tcf_police_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { int ret = 0, tcfp_result = TC_ACT_OK, err, size; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 203e399e5c85..36b8adbe935d 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { static int tcf_sample_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, - int bind, bool rtnl_held, + int bind, bool rtnl_held, struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, sample_net_id); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index d54cb608dbaf..4916dc3e3668 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -78,7 +78,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { static int tcf_simp_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 65879500b688..4566eff3d027 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -96,6 +96,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 7bac1d78e7a3..b9ab2c8f07f1 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -82,6 +82,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 7c6591b991d5..fc295d91559a 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -210,6 +210,7 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p) static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index ac0061599225..4651ee15e35d 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -105,7 +105,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind, bool rtnl_held, - struct netlink_ext_ack *extack) + struct tcf_proto *tp, struct netlink_ext_ack *extack) { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; -- cgit From 4e1810049c267c203c19130301203d0591174535 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:00 +0100 Subject: net/sched: act_bpf: validate the control action inside init() the following script: # tc filter add dev crash0 egress matchall \ > action bpf bytecode '1,6 0 0 4294967295' pass index 90 # tc actions replace action bpf \ > bytecode '1,6 0 0 4294967295' goto chain 42 index 90 cookie c1a0c1a0 # tc action show action bpf had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: bpf bytecode '1,6 0 0 4294967295' default-action goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffb3a0803dfa90 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff942b347ada00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffffb3a08034d038 RDI: ffff942b347ada00 RBP: ffffb3a0803dfb30 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffffb3a0803dfb0c R12: ffff942b3b682b00 R13: ffff942b3b682b08 R14: 0000000000000001 R15: ffff942b3b682f00 FS: 00007f6160a72740(0000) GS:ffff942b3da80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000795a4002 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip_finish_output2+0x16f/0x430 ip_finish_output2+0x16f/0x430 ? ip_output+0x69/0xe0 ip_output+0x69/0xe0 ? ip_forward_options+0x1a0/0x1a0 ip_send_skb+0x15/0x40 raw_sendmsg+0x8e1/0xbd0 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0xc/0xa0 ? try_to_wake_up+0x54/0x480 ? ldsem_down_read+0x3f/0x280 ? _cond_resched+0x15/0x40 ? down_read+0xe/0x30 ? copy_termios+0x1e/0x70 ? tty_mode_ioctl+0x1b6/0x4c0 ? sock_sendmsg+0x36/0x40 sock_sendmsg+0x36/0x40 __sys_sendto+0x10e/0x140 ? do_vfs_ioctl+0xa4/0x640 ? handle_mm_fault+0xdc/0x210 ? syscall_trace_enter+0x1df/0x2e0 ? __audit_syscall_exit+0x216/0x260 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f615f7e3c03 Code: 48 8b 0d 90 62 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 9d c3 2c 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 4b cc 00 00 48 89 04 24 RSP: 002b:00007ffee5d8cc28 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055a4f28f1700 RCX: 00007f615f7e3c03 RDX: 0000000000000040 RSI: 000055a4f28f1700 RDI: 0000000000000003 RBP: 00007ffee5d8e340 R08: 000055a4f28ee510 R09: 0000000000000010 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000040 R13: 000055a4f28f16c0 R14: 000055a4f28ef69c R15: 0000000000000080 Modules linked in: act_bpf veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache crct10dif_pclmul jbd2 crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper pcspkr joydev virtio_balloon snd_timer snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper virtio_blk virtio_net virtio_console net_failover failover syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel ata_piix serio_raw libata virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_bpf_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_bpf.c | 23 +++++++++++++++----- .../selftests/tc-testing/tc-tests/actions/bpf.json | 25 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 3c0468f2aae6..3841156aa09f 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -282,6 +283,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, bpf_net_id); struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_bpf_cfg cfg, old; struct tc_act_bpf *parm; struct tcf_bpf *prog; @@ -323,12 +325,16 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return ret; } + ret = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (ret < 0) + goto release_idr; + is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) { ret = -EINVAL; - goto out; + goto put_chain; } memset(&cfg, 0, sizeof(cfg)); @@ -336,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) : tcf_bpf_init_from_efd(tb, &cfg); if (ret < 0) - goto out; + goto put_chain; prog = to_bpf(*act); @@ -350,10 +356,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (cfg.bpf_num_ops) prog->bpf_num_ops = cfg.bpf_num_ops; - prog->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*act, parm->action, goto_ch); rcu_assign_pointer(prog->filter, cfg.filter); spin_unlock_bh(&prog->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (res == ACT_P_CREATED) { tcf_idr_insert(tn, *act); } else { @@ -363,9 +372,13 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, } return res; -out: - tcf_idr_release(*act, bind); +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + +release_idr: + tcf_idr_release(*act, bind); return ret; } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json index 5970cee6d05f..b074ea9b6fe8 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json @@ -286,5 +286,30 @@ "teardown": [ "$TC action flush action bpf" ] + }, + { + "id": "b8a1", + "name": "Replace bpf action with invalid goto_chain control", + "category": [ + "actions", + "bpf" + ], + "setup": [ + [ + "$TC actions flush action bpf", + 0, + 1, + 255 + ], + "$TC action add action bpf bytecode '1,6 0 0 4294967295' pass index 90" + ], + "cmdUnderTest": "$TC action replace action bpf bytecode '1,6 0 0 4294967295' goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC action list action bpf", + "matchPattern": "action order [0-9]*: bpf.* default-action pass.*index 90", + "matchCount": "1", + "teardown": [ + "$TC action flush action bpf" + ] } ] -- cgit From f5c29d83866d75585f9c89754de0b86b45ceab89 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:01 +0100 Subject: net/sched: act_csum: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall action csum icmp pass index 90 # tc actions replace action csum icmp goto chain 42 index 90 \ > cookie c1a0c1a0 # tc actions show action csum had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: csum (icmp) action goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 8000000074692067 P4D 8000000074692067 PUD 2e210067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff93153da03be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9314ee40f700 RCX: 0000000000003a00 RDX: 0000000000000000 RSI: ffff931537c87828 RDI: ffff931537c87818 RBP: ffff93153da03c80 R08: 00000000527cffff R09: 0000000000000003 R10: 000000000000003f R11: 0000000000000028 R12: ffff9314edf68400 R13: ffff9314edf68408 R14: 0000000000000001 R15: ffff9314ed67b600 FS: 0000000000000000(0000) GS:ffff93153da00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000073e32003 CR4: 00000000001606f0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 66 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffffff9a803e98 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffff99e184f0 RBX: 0000000000000000 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000000 RBP: 0000000000000000 R08: 000eb5c4572376b3 R09: 0000000000000000 R10: ffffa53e806a3ca0 R11: 00000000000f4240 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_kernel+0x49e/0x4be secondary_startup_64+0xa4/0xb0 Modules linked in: act_csum veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel mbcache snd_hda_codec jbd2 snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd snd_timer glue_helper snd joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt net_failover fb_sys_fops virtio_console virtio_blk ttm failover drm ata_piix crc32c_intel floppy virtio_pci serio_raw libata virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_csum_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_csum.c | 20 ++++++++++++++--- .../tc-testing/tc-tests/actions/csum.json | 25 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 9ba0f61a1e82..0c77e7bdf6d5 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -33,6 +33,7 @@ #include #include +#include #include #include @@ -52,6 +53,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, csum_net_id); struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_csum *parm; struct tcf_csum *p; int ret = 0, err; @@ -87,21 +89,27 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p = to_tcf_csum(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->update_flags = parm->update_flags; spin_lock_bh(&p->tcf_lock); - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(p->params, params_new, lockdep_is_held(&p->tcf_lock)); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (params_new) kfree_rcu(params_new, rcu); @@ -109,6 +117,12 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } /** diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json index a022792d392a..ddabb2fbb7c7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json @@ -500,5 +500,30 @@ "matchPattern": "^[ \t]+index [0-9]+ ref", "matchCount": "0", "teardown": [] + }, + { + "id": "d128", + "name": "Replace csum action with invalid goto chain control", + "category": [ + "actions", + "csum" + ], + "setup": [ + [ + "$TC actions flush action csum", + 0, + 1, + 255 + ], + "$TC actions add action csum iph index 90" + ], + "cmdUnderTest": "$TC actions replace action csum iph goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action csum index 90", + "matchPattern": "action order [0-9]*: csum \\(iph\\) action pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action csum" + ] } ] -- cgit From 0da2dbd6029c2be4191651bafa57c3c006eff63c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:02 +0100 Subject: net/sched: act_gact: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action gact pass index 90 # tc actions replace action gact \ > goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action gact had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: gact action goto chain 42 random type none pass val 0 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff8c2434703be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff8c23ed6d7e00 RCX: 000000000000005a RDX: 0000000000000000 RSI: 0000000000000001 RDI: ffff8c23ed6d7e00 RBP: ffff8c2434703c80 R08: ffff8c243b639ac8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8c2429e68b00 R13: ffff8c2429e68b08 R14: 0000000000000001 R15: ffff8c2429c5a480 FS: 0000000000000000(0000) GS:ffff8c2434700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000002dc0e005 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 74 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffff9c8640387eb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffff8b2184f0 RBX: 0000000000000002 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000002 RBP: 0000000000000002 R08: 000eb57882b36cc3 R09: 0000000000000020 R10: 0000000000000004 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_gact act_bpf veth ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic ext4 snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core mbcache jbd2 snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper virtio_balloon joydev pcspkr snd_timer snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea virtio_net sysfillrect net_failover virtio_blk sysimgblt fb_sys_fops virtio_console ttm failover drm crc32c_intel serio_raw ata_piix libata floppy virtio_pci virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_bpf] CR2: 0000000000000000 Validating the control action within tcf_gact_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_gact.c | 13 ++++++++++- .../tc-testing/tc-tests/actions/gact.json | 25 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index b8ad311bd8cc..e540e31069d7 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -61,6 +62,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, gact_net_id); struct nlattr *tb[TCA_GACT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_gact *parm; struct tcf_gact *gact; int ret = 0; @@ -116,10 +118,13 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; gact = to_gact(*a); spin_lock_bh(&gact->tcf_lock); - gact->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); #ifdef CONFIG_GACT_PROB if (p_parm) { gact->tcfg_paction = p_parm->paction; @@ -133,9 +138,15 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, #endif spin_unlock_bh(&gact->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_gact_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json index 89189a03ce3d..814b7a8a478b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json @@ -560,5 +560,30 @@ "teardown": [ "$TC actions flush action gact" ] + }, + { + "id": "ca89", + "name": "Replace gact action with invalid goto chain control", + "category": [ + "actions", + "gact" + ], + "setup": [ + [ + "$TC actions flush action gact", + 0, + 1, + 255 + ], + "$TC actions add action pass random determ drop 2 index 90" + ], + "cmdUnderTest": "$TC actions replace action goto chain 42 random determ drop 5 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action gact", + "matchPattern": "action order [0-9]*: gact action pass.*random type determ drop val 2.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action gact" + ] } ] -- cgit From 11a94d7fd80f92325e7b8653290ad3d2cd67f119 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:03 +0100 Subject: net/sched: act_ife: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action ife encode allow mark pass index 90 # tc actions replace action ife \ > encode allow mark goto chain 42 index 90 cookie c1a0c1a0 # tc action show action ife had the following output: IFE type 0xED3E IFE type 0xED3E Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: ife encode action goto chain 42 type 0XED3E allow mark index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000007b4e7067 P4D 800000007b4e7067 PUD 7b4e6067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 164 Comm: kworker/2:1 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffa6a7c0553ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9796ee1bbd00 RCX: 0000000000000001 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: ffffa6a7c0553b70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff9797385bb038 R12: ffff9796ead9d700 R13: ffff9796ead9d708 R14: 0000000000000001 R15: ffff9796ead9d800 FS: 0000000000000000(0000) GS:ffff97973db00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007c41e006 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_gact act_meta_mark act_ife dummy veth ip6table_filter ip6_tables iptable_filter binfmt_misc snd_hda_codec_generic ext4 snd_hda_intel snd_hda_codec crct10dif_pclmul mbcache crc32_pclmul jbd2 snd_hwdep snd_hda_core ghash_clmulni_intel snd_seq snd_seq_device snd_pcm snd_timer aesni_intel crypto_simd snd cryptd glue_helper virtio_balloon joydev pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl virtio_net drm_kms_helper virtio_blk net_failover syscopyarea failover sysfillrect virtio_console sysimgblt fb_sys_fops ttm drm crc32c_intel serio_raw ata_piix virtio_pci virtio_ring libata virtio floppy dm_mirror dm_region_hash dm_log dm_mod [last unloaded: act_ife] CR2: 0000000000000000 Validating the control action within tcf_ife_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_ife.c | 33 +++++++++++++--------- .../selftests/tc-testing/tc-tests/actions/ife.json | 25 ++++++++++++++++ 2 files changed, 45 insertions(+), 13 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c1ba74d5c1e3..31c6ffb6abe7 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -474,6 +475,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_ife_params *p; struct tcf_ife_info *ife; u16 ife_type = ETH_P_IFE; @@ -531,6 +533,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, } ife = to_ife(*a); + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { @@ -563,13 +569,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (tb[TCA_IFE_METALST]) { err = nla_parse_nested(tb2, IFE_META_MAX, tb[TCA_IFE_METALST], NULL, NULL); - if (err) { -metadata_parse_err: - tcf_idr_release(*a, bind); - kfree(p); - return err; - } - + if (err) + goto metadata_parse_err; err = populate_metalist(ife, tb2, exists, rtnl_held); if (err) goto metadata_parse_err; @@ -581,21 +582,20 @@ metadata_parse_err: * going to bail out */ err = use_all_metadata(ife, exists); - if (err) { - tcf_idr_release(*a, bind); - kfree(p); - return err; - } + if (err) + goto metadata_parse_err; } if (exists) spin_lock_bh(&ife->tcf_lock); - ife->tcf_action = parm->action; /* protected by tcf_lock when modifying existing action */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(ife->params, p, 1); if (exists) spin_unlock_bh(&ife->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); @@ -603,6 +603,13 @@ metadata_parse_err: tcf_idr_insert(tn, *a); return ret; +metadata_parse_err: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + kfree(p); + tcf_idr_release(*a, bind); + return err; } static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json index 0da3545cabdb..c13a68b98fc7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json @@ -1060,5 +1060,30 @@ "matchPattern": "action order [0-9]*: ife encode action pipe.*allow prio.*index 4", "matchCount": "0", "teardown": [] + }, + { + "id": "a0e2", + "name": "Replace ife encode action with invalid goto chain control", + "category": [ + "actions", + "ife" + ], + "setup": [ + [ + "$TC actions flush action ife", + 0, + 1, + 255 + ], + "$TC actions add action ife encode allow mark pass index 90" + ], + "cmdUnderTest": "$TC actions replace action ife encode allow mark goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action ife index 90", + "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E .*allow mark.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action ife" + ] } ] -- cgit From ff9721d32b1aba8bf46a06df20827d0a5d52ec48 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:04 +0100 Subject: net/sched: act_mirred: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action mirred ingress mirror dev lo pass # tc actions replace action mirred \ > ingress mirror dev lo goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action mirred had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: mirred (Ingress Mirror to device lo) goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: Mirror/redirect action on BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 47 Comm: kworker/3:1 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffa772404b7ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9c5afc3f4300 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9c5afdba9380 RDI: 0000000000029380 RBP: ffffa772404b7b70 R08: ffff9c5af7010028 R09: ffff9c5af7010029 R10: 0000000000000000 R11: ffff9c5af94c6a38 R12: ffff9c5af7953000 R13: ffff9c5af7953008 R14: 0000000000000001 R15: ffff9c5af7953d00 FS: 0000000000000000(0000) GS:ffff9c5afdb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007c514004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_mirred veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul snd_hda_codec_generic crc32_pclmul snd_hda_intel snd_hda_codec mbcache ghash_clmulni_intel jbd2 snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel snd_timer snd crypto_simd cryptd glue_helper soundcore virtio_balloon joydev pcspkr i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio_net ttm virtio_blk net_failover virtio_console failover drm ata_piix crc32c_intel virtio_pci serio_raw libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_mirred_init() proved to fix the above issue. For the same reason, postpone the assignment of tcfa_action and tcfm_eaction to avoid partial reconfiguration of a mirred rule when it's replaced by another one that mirrors to a device that does not exist. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 21 ++++++++++++++---- .../tc-testing/tc-tests/actions/mirred.json | 25 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 383f4024452c..cd712e4e8998 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -99,6 +99,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, mirred_net_id); struct nlattr *tb[TCA_MIRRED_MAX + 1]; + struct tcf_chain *goto_ch = NULL; bool mac_header_xmit = false; struct tc_mirred *parm; struct tcf_mirred *m; @@ -158,18 +159,20 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + m = to_mirred(*a); spin_lock_bh(&m->tcf_lock); - m->tcf_action = parm->action; - m->tcfm_eaction = parm->eaction; if (parm->ifindex) { dev = dev_get_by_index(net, parm->ifindex); if (!dev) { spin_unlock_bh(&m->tcf_lock); - tcf_idr_release(*a, bind); - return -ENODEV; + err = -ENODEV; + goto put_chain; } mac_header_xmit = dev_is_mac_header_xmit(dev); rcu_swap_protected(m->tcfm_dev, dev, @@ -178,7 +181,11 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, dev_put(dev); m->tcfm_mac_header_xmit = mac_header_xmit; } + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); + m->tcfm_eaction = parm->eaction; spin_unlock_bh(&m->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) { spin_lock(&mirred_list_lock); @@ -189,6 +196,12 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, } return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index db49fd0f8445..6e5fb3d25681 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -434,5 +434,30 @@ "teardown": [ "$TC actions flush action mirred" ] + }, + { + "id": "2a9a", + "name": "Replace mirred action with invalid goto chain control", + "category": [ + "actions", + "mirred" + ], + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + "$TC actions add action mirred ingress mirror dev lo drop index 90" + ], + "cmdUnderTest": "$TC actions replace action mirred ingress mirror dev lo goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action mirred index 90", + "matchPattern": "action order [0-9]*: mirred \\(Ingress Mirror to device lo\\) drop.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action mirred" + ] } ] -- cgit From c53075ea5d3c44849992523d5d83e2810d05a00e Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:05 +0100 Subject: net/sched: act_connmark: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action connmark pass index 90 # tc actions replace action connmark \ > goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action connmark had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: connmark zone 0 goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 302 Comm: kworker/0:2 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff9bea406c3ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff8c5dfc009f00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9bea406c3a80 RDI: ffff8c5dfb9d6ec0 RBP: ffff9bea406c3b70 R08: ffff8c5dfda222a0 R09: ffffffff90933c3c R10: 0000000000000000 R11: 0000000092793f7d R12: ffff8c5df48b3c00 R13: ffff8c5df48b3c08 R14: 0000000000000001 R15: ffff8c5dfb9d6e40 FS: 0000000000000000(0000) GS:ffff8c5dfda00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000062e0e006 CR4: 00000000001606f0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_connmark nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul mbcache crc32_pclmul jbd2 snd_hda_codec_generic ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel snd_timer crypto_simd cryptd snd glue_helper joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper virtio_net net_failover syscopyarea virtio_blk failover virtio_console sysfillrect sysimgblt fb_sys_fops ttm drm ata_piix crc32c_intel serio_raw libata virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_connmark_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_connmark.c | 21 +++++++++++++++--- .../tc-testing/tc-tests/actions/connmark.json | 25 ++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 44aa046a92ea..32ae0cd6e31c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -102,9 +103,10 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, connmark_net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_connmark_info *ci; struct tc_connmark *parm; - int ret = 0; + int ret = 0, err; if (!nla) return -EINVAL; @@ -129,7 +131,11 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, } ci = to_connmark(*a); - ci->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->net = net; ci->zone = parm->zone; @@ -143,15 +149,24 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; /* replacing action and zone */ spin_lock_bh(&ci->tcf_lock); - ci->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); ci->zone = parm->zone; spin_unlock_bh(&ci->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); ret = 0; } return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json index 13147a1f5731..cadde8f41fcd 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json @@ -287,5 +287,30 @@ "teardown": [ "$TC actions flush action connmark" ] + }, + { + "id": "c506", + "name": "Replace connmark with invalid goto chain control", + "category": [ + "actions", + "connmark" + ], + "setup": [ + [ + "$TC actions flush action connmark", + 0, + 1, + 255 + ], + "$TC actions add action connmark pass index 90" + ], + "cmdUnderTest": "$TC actions replace action connmark goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action connmark index 90", + "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action connmark" + ] } ] -- cgit From 1e45d043a8bb2ed8a541384db3cc133e92001f0c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:06 +0100 Subject: net/sched: act_nat: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action nat ingress 1.18.1.1 1.18.2.2 pass index 90 # tc actions replace action nat \ > ingress 1.18.1.1 1.18.2.2 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action nat had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: nat ingress 1.18.1.1/32 1.18.2.2 goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000002d180067 P4D 800000002d180067 PUD 7cb8b067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 164 Comm: kworker/3:1 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffae4500e2fad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9fa52e28c800 RCX: 0000000001011201 RDX: 0000000000000000 RSI: 0000000000000056 RDI: ffff9fa52ca12800 RBP: ffffae4500e2fb70 R08: 0000000000000022 R09: 000000000000000e R10: 00000000ffffffff R11: 0000000001011201 R12: ffff9fa52cbc9c00 R13: ffff9fa52cbc9c08 R14: 0000000000000001 R15: ffff9fa52ca12780 FS: 0000000000000000(0000) GS:ffff9fa57db80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 0000000073f8c004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_nat veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mbcache jbd2 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper snd_timer snd joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs qxl ata_generic pata_acpi drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_net virtio_blk net_failover failover virtio_console drm crc32c_intel floppy ata_piix libata virtio_pci virtio_ring virtio serio_raw dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_nat_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_nat.c | 12 ++++++++++- .../selftests/tc-testing/tc-tests/actions/nat.json | 25 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index de4b493e26d2..e91bb8eb81ec 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, { struct tc_action_net *tn = net_generic(net, nat_net_id); struct nlattr *tb[TCA_NAT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_nat *parm; int ret = 0, err; struct tcf_nat *p; @@ -77,6 +79,9 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, } else { return err; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; p = to_tcf_nat(*a); spin_lock_bh(&p->tcf_lock); @@ -85,13 +90,18 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, p->mask = parm->mask; p->flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_nat_act(struct sk_buff *skb, const struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json index 0080dc2fd41c..bc12c1ccad30 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json @@ -589,5 +589,30 @@ "teardown": [ "$TC actions flush action nat" ] + }, + { + "id": "4b12", + "name": "Replace nat action with invalid goto chain control", + "category": [ + "actions", + "nat" + ], + "setup": [ + [ + "$TC actions flush action nat", + 0, + 1, + 255 + ], + "$TC actions add action nat ingress 1.18.1.1 1.18.2.2 drop index 90" + ], + "cmdUnderTest": "$TC actions replace action nat ingress 1.18.1.1 1.18.2.2 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action nat index 90", + "matchPattern": "action order [0-9]+: nat ingress 1.18.1.1/32 1.18.2.2 drop.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action nat" + ] } ] -- cgit From 6ac86ca3524b4549d31c45d11487b0626c334f10 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:07 +0100 Subject: net/sched: act_pedit: validate the control action inside init() the following script: # tc filter add dev crash0 egress matchall \ > action pedit ex munge ip ttl set 10 pass index 90 # tc actions replace action pedit \ > ex munge ip ttl set 10 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action pedit had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: pedit action goto chain 42 keys 1 index 90 ref 2 bind 1 key #0 at ipv4+8: val 0a000000 mask 00ffffff cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 0 Comm: swapper/2 Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff94a73db03be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff94a6ee4c0700 RCX: 000000000000000a RDX: 0000000000000000 RSI: ffff94a6ed22c800 RDI: 0000000000000000 RBP: ffff94a73db03c80 R08: ffff94a7386fa4c8 R09: ffff94a73229ea20 R10: 0000000000000000 R11: 0000000000000000 R12: ffff94a6ed22cb00 R13: ffff94a6ed22cb08 R14: 0000000000000001 R15: ffff94a6ed22c800 FS: 0000000000000000(0000) GS:ffff94a73db00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007120e002 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 4e ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffab1740387eb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffffb18184f0 RBX: 0000000000000002 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000002 RBP: 0000000000000002 R08: 000f168fa695f9a9 R09: 0000000000000020 R10: 0000000000000004 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_pedit veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 mbcache jbd2 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep aesni_intel snd_hda_core crypto_simd snd_seq cryptd glue_helper snd_seq_device snd_pcm joydev snd_timer pcspkr virtio_balloon snd soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs qxl ata_generic pata_acpi drm_kms_helper virtio_net net_failover syscopyarea sysfillrect sysimgblt failover virtio_blk fb_sys_fops virtio_console ttm drm crc32c_intel serio_raw ata_piix virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_pedit_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_pedit.c | 16 ++++++- .../tc-testing/tc-tests/actions/pedit.json | 51 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 8ca82aefa11a..287793abfaf9 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -23,6 +23,7 @@ #include #include #include +#include static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -142,6 +143,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, pedit_net_id); struct nlattr *tb[TCA_PEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_pedit_key *keys = NULL; struct tcf_pedit_key_ex *keys_ex; struct tc_pedit *parm; @@ -205,6 +207,11 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, goto out_free; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + goto out_release; + } p = to_pedit(*a); spin_lock_bh(&p->tcf_lock); @@ -214,7 +221,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (!keys) { spin_unlock_bh(&p->tcf_lock); ret = -ENOMEM; - goto out_release; + goto put_chain; } kfree(p->tcfp_keys); p->tcfp_keys = keys; @@ -223,16 +230,21 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, memcpy(p->tcfp_keys, parm->keys, ksize); p->tcfp_flags = parm->flags; - p->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); kfree(p->tcfp_keys_ex); p->tcfp_keys_ex = keys_ex; spin_unlock_bh(&p->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); out_release: tcf_idr_release(*a, bind); out_free: diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json new file mode 100644 index 000000000000..b73ceb9e28b1 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json @@ -0,0 +1,51 @@ +[ + { + "id": "319a", + "name": "Add pedit action that mangles IP TTL", + "category": [ + "actions", + "pedit" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ] + ], + "cmdUnderTest": "$TC actions add action pedit ex munge ip ttl set 10", + "expExitCode": "0", + "verifyCmd": "$TC actions ls action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 1 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + }, + { + "id": "7e67", + "name": "Replace pedit action with invalid goto chain", + "category": [ + "actions", + "pedit" + ], + "setup": [ + [ + "$TC actions flush action pedit", + 0, + 1, + 255 + ], + "$TC actions add action pedit ex munge ip ttl set 10 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action pedit ex munge ip ttl set 10 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action pedit", + "matchPattern": "action order [0-9]+: pedit action pass keys 1.*index 90 ref.*key #0 at ipv4\\+8: val 0a000000 mask 00ffffff", + "matchCount": "1", + "teardown": [ + "$TC actions flush action pedit" + ] + } +] -- cgit From d6124d6ba697413efc53ff6919b1e0c250f1902a Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:08 +0100 Subject: net/sched: act_police: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action police rate 3mbit burst 250k pass index 90 # tc actions replace action police \ > rate 3mbit burst 250k goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action police rate 3mbit burst had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: police 0x5a rate 3Mbit burst 250Kb mtu 2Kb action goto chain 42 overhead 0b ref 2 bind 1 cookie c1a0c1a0 Then, when crash0 starts transmitting more than 3Mbit/s, the following kernel crash is observed: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000007a779067 P4D 800000007a779067 PUD 2ad96067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 5032 Comm: netperf Not tainted 5.0.0-rc4.gotochain_crash+ #533 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffb0e04064fa60 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff93bb3322cce0 RCX: 0000000000000005 RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff93bb3322cce0 RBP: ffffb0e04064fb00 R08: 0000000000000022 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000001 R12: ffff93bb3beed300 R13: ffff93bb3beed308 R14: 0000000000000001 R15: ffff93bb3b64d000 FS: 00007f0bc6be5740(0000) GS:ffff93bb3db80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000746a8001 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ipt_do_table+0x31c/0x420 [ip_tables] ? ip_finish_output2+0x16f/0x430 ip_finish_output2+0x16f/0x430 ? ip_output+0x69/0xe0 ip_output+0x69/0xe0 ? ip_forward_options+0x1a0/0x1a0 __tcp_transmit_skb+0x563/0xa40 tcp_write_xmit+0x243/0xfa0 __tcp_push_pending_frames+0x32/0xf0 tcp_sendmsg_locked+0x404/0xd30 tcp_sendmsg+0x27/0x40 sock_sendmsg+0x36/0x40 __sys_sendto+0x10e/0x140 ? __sys_connect+0x87/0xf0 ? syscall_trace_enter+0x1df/0x2e0 ? __audit_syscall_exit+0x216/0x260 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x5b/0x180 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f0bc5ffbafd Code: 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 8b 05 ae c4 2c 00 85 c0 75 2d 45 31 c9 45 31 c0 4c 63 d1 48 63 ff b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 01 c3 48 8b 15 63 63 2c 00 f7 d8 64 89 02 48 RSP: 002b:00007fffef94b7f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000004000 RCX: 00007f0bc5ffbafd RDX: 0000000000004000 RSI: 00000000017e5420 RDI: 0000000000000004 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000004 R13: 00000000017e51d0 R14: 0000000000000010 R15: 0000000000000006 Modules linked in: act_police veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic mbcache crct10dif_pclmul jbd2 crc32_pclmul ghash_clmulni_intel snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper snd_timer snd joydev pcspkr virtio_balloon soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm virtio_blk virtio_net virtio_console net_failover failover crc32c_intel ata_piix libata serio_raw virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_police_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_police.c | 12 ++++++++++- .../tc-testing/tc-tests/actions/police.json | 25 ++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 229eba7925e5..2b8581f6ab51 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -21,6 +21,7 @@ #include #include #include +#include struct tcf_police_params { int tcfp_result; @@ -88,6 +89,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, { int ret = 0, tcfp_result = TC_ACT_OK, err, size; struct nlattr *tb[TCA_POLICE_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; @@ -129,6 +131,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; police = to_police(*a); if (parm->rate.rate) { @@ -214,12 +219,14 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, if (new->peak_present) police->tcfp_ptoks = new->tcfp_mtu_ptoks; spin_unlock_bh(&police->tcfp_lock); - police->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(police->params, new, lockdep_is_held(&police->tcf_lock)); spin_unlock_bh(&police->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (new) kfree_rcu(new, rcu); @@ -230,6 +237,9 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: tcf_idr_release(*a, bind); return err; } diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json index 4086a50a670e..b8268da5adaa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json @@ -739,5 +739,30 @@ "teardown": [ "$TC actions flush action police" ] + }, + { + "id": "689e", + "name": "Replace police action with invalid goto chain control", + "category": [ + "actions", + "police" + ], + "setup": [ + [ + "$TC actions flush action police", + 0, + 1, + 255 + ], + "$TC actions add action police rate 3mbit burst 250k drop index 90" + ], + "cmdUnderTest": "$TC actions replace action police rate 3mbit burst 250k goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action police index 90", + "matchPattern": "action order [0-9]*: police 0x5a rate 3Mbit burst 250Kb mtu 2Kb action drop", + "matchCount": "1", + "teardown": [ + "$TC actions flush action police" + ] } ] -- cgit From e8c87c643ef3603c6e63bdecb67b03d794648493 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:09 +0100 Subject: net/sched: act_sample: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action sample rate 1024 group 4 pass index 90 # tc actions replace action sample \ > rate 1024 group 4 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action sample had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: sample rate 1/1024 group 4 goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 8000000079966067 P4D 8000000079966067 PUD 7987b067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 0 PID: 5 Comm: kworker/0:0 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffbee60033fad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff99d7ae6e3b00 RCX: 00000000e555df9b RDX: 0000000000000000 RSI: 00000000b0352718 RDI: ffff99d7fda1fcf0 RBP: ffffbee60033fb70 R08: 0000000070731ab1 R09: 0000000000000400 R10: 0000000000000000 R11: ffff99d7ac733838 R12: ffff99d7f3c2be00 R13: ffff99d7f3c2be08 R14: 0000000000000001 R15: ffff99d7f3c2b600 FS: 0000000000000000(0000) GS:ffff99d7fda00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000797de006 CR4: 00000000001606f0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_sample psample veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mbcache jbd2 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device aesni_intel crypto_simd snd_pcm cryptd glue_helper snd_timer joydev snd pcspkr virtio_balloon i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt fb_sys_fops net_failover ttm failover virtio_blk virtio_console drm ata_piix serio_raw crc32c_intel libata virtio_pci virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_sample_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_sample.c | 19 +++++++++++++--- .../tc-testing/tc-tests/actions/sample.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 36b8adbe935d..4060b0955c97 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -43,6 +44,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, sample_net_id); struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; + struct tcf_chain *goto_ch = NULL; struct tc_sample *parm; u32 psample_group_num; struct tcf_sample *s; @@ -79,18 +81,21 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; psample_group_num = nla_get_u32(tb[TCA_SAMPLE_PSAMPLE_GROUP]); psample_group = psample_group_get(net, psample_group_num); if (!psample_group) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } s = to_sample(*a); spin_lock_bh(&s->tcf_lock); - s->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); s->rate = nla_get_u32(tb[TCA_SAMPLE_RATE]); s->psample_group_num = psample_group_num; RCU_INIT_POINTER(s->psample_group, psample_group); @@ -100,10 +105,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, s->trunc_size = nla_get_u32(tb[TCA_SAMPLE_TRUNC_SIZE]); } spin_unlock_bh(&s->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_sample_cleanup(struct tc_action *a) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json index 3aca33c00039..27f0acaed880 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json @@ -584,5 +584,30 @@ "teardown": [ "$TC actions flush action sample" ] + }, + { + "id": "0a6e", + "name": "Replace sample action with invalid goto chain control", + "category": [ + "actions", + "sample" + ], + "setup": [ + [ + "$TC actions flush action sample", + 0, + 1, + 255 + ], + "$TC actions add action sample rate 1024 group 4 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action sample rate 1024 group 7 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action sample", + "matchPattern": "action order [0-9]+: sample rate 1/1024 group 4 pass.*index 90", + "matchCount": "1", + "teardown": [ + "$TC actions flush action sample" + ] } ] -- cgit From 4b006b0c139e486773335f4c23b4d82348cfeb04 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:10 +0100 Subject: net/sched: act_simple: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action simple sdata hello pass index 90 # tc actions replace action simple \ > sdata world goto chain 42 index 90 cookie c1a0c1a0 # tc action show action simple had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: Simple index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000006a6fb067 P4D 800000006a6fb067 PUD 6aed6067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 2 PID: 3241 Comm: kworker/2:0 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffbe6781763ad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9e59bdb80e00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9e59b4716738 RDI: ffff9e59ab12d140 RBP: ffffbe6781763b70 R08: 0000000000000234 R09: 0000000000aaaaaa R10: 0000000000000000 R11: ffff9e59b247cd50 R12: ffff9e59b112f100 R13: ffff9e59b112f108 R14: 0000000000000001 R15: ffff9e59ab12d0c0 FS: 0000000000000000(0000) GS:ffff9e59b4700000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000006af92004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_simple veth ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep mbcache snd_hda_core jbd2 snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd snd_timer glue_helper snd joydev virtio_balloon pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops virtio_net ttm net_failover virtio_console virtio_blk failover drm crc32c_intel serio_raw floppy ata_piix libata virtio_pci virtio_ring virtio dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_simple_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_simple.c | 52 ++++++++++++++++------ .../tc-testing/tc-tests/actions/simple.json | 25 +++++++++++ 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 4916dc3e3668..23c8ca5615e5 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -60,14 +61,26 @@ static int alloc_defdata(struct tcf_defact *d, const struct nlattr *defdata) return 0; } -static void reset_policy(struct tcf_defact *d, const struct nlattr *defdata, - struct tc_defact *p) +static int reset_policy(struct tc_action *a, const struct nlattr *defdata, + struct tc_defact *p, struct tcf_proto *tp, + struct netlink_ext_ack *extack) { + struct tcf_chain *goto_ch = NULL; + struct tcf_defact *d; + int err; + + err = tcf_action_check_ctrlact(p->action, tp, &goto_ch, extack); + if (err < 0) + return err; + d = to_defact(a); spin_lock_bh(&d->tcf_lock); - d->tcf_action = p->action; + goto_ch = tcf_action_set_ctrlact(a, p->action, goto_ch); memset(d->tcfd_defdata, 0, SIMP_MAX_DATA); nla_strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA); spin_unlock_bh(&d->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + return 0; } static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = { @@ -82,6 +95,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, simp_net_id); struct nlattr *tb[TCA_DEF_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_defact *parm; struct tcf_defact *d; bool exists = false; @@ -122,27 +136,37 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } d = to_defact(*a); - ret = alloc_defdata(d, tb[TCA_DEF_DATA]); - if (ret < 0) { - tcf_idr_release(*a, bind); - return ret; - } - d->tcf_action = parm->action; + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, + extack); + if (err < 0) + goto release_idr; + + err = alloc_defdata(d, tb[TCA_DEF_DATA]); + if (err < 0) + goto put_chain; + + tcf_action_set_ctrlact(*a, parm->action, goto_ch); ret = ACT_P_CREATED; } else { - d = to_defact(*a); - if (!ovr) { - tcf_idr_release(*a, bind); - return -EEXIST; + err = -EEXIST; + goto release_idr; } - reset_policy(d, tb[TCA_DEF_DATA], parm); + err = reset_policy(*a, tb[TCA_DEF_DATA], parm, tp, extack); + if (err) + goto release_idr; } if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json index e89a7aa4012d..8e8c1ae12260 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json @@ -126,5 +126,30 @@ "teardown": [ "" ] + }, + { + "id": "b776", + "name": "Replace simple action with invalid goto chain control", + "category": [ + "actions", + "simple" + ], + "setup": [ + [ + "$TC actions flush action simple", + 0, + 1, + 255 + ], + "$TC actions add action simple sdata \"hello\" pass index 90" + ], + "cmdUnderTest": "$TC actions replace action simple sdata \"world\" goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action simple", + "matchPattern": "action order [0-9]*: Simple .*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action simple" + ] } ] -- cgit From ec7727bb24b01e96b0c46068addf355ee4f794d8 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:11 +0100 Subject: net/sched: act_skbedit: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action skbedit ptype host pass index 90 # tc actions replace action skbedit \ > ptype host goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action skbedit had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: skbedit ptype host goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 3467 Comm: kworker/3:3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffffb50a81e1fad0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9aa47ba4ea00 RCX: 0000000000000001 RDX: 0000000000000000 RSI: ffff9aa469eeb3c0 RDI: ffff9aa47ba4ea00 RBP: ffffb50a81e1fb70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: ffff9aa47bce0638 R12: ffff9aa4793b0c00 R13: ffff9aa4793b0c08 R14: 0000000000000001 R15: ffff9aa469eeb3c0 FS: 0000000000000000(0000) GS:ffff9aa474780000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007360e005 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ndisc_next_option+0x50/0x50 ? ___neigh_create+0x4d5/0x680 ? ip6_finish_output2+0x1b5/0x590 ip6_finish_output2+0x1b5/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.28+0x79/0xc0 ndisc_send_skb+0x248/0x2e0 ndisc_send_ns+0xf8/0x200 ? addrconf_dad_work+0x389/0x4b0 addrconf_dad_work+0x389/0x4b0 ? __switch_to_asm+0x34/0x70 ? process_one_work+0x195/0x380 ? addrconf_dad_completed+0x370/0x370 process_one_work+0x195/0x380 worker_thread+0x30/0x390 ? process_one_work+0x380/0x380 kthread+0x113/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x35/0x40 Modules linked in: act_skbedit veth ip6table_filter ip6_tables iptable_filter binfmt_misc crct10dif_pclmul crc32_pclmul ghash_clmulni_intel ext4 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep mbcache snd_hda_core jbd2 snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd snd_timer glue_helper snd joydev soundcore pcspkr virtio_balloon i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm virtio_net net_failover drm failover virtio_blk virtio_console ata_piix virtio_pci crc32c_intel serio_raw libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_skbedit_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_skbedit.c | 19 +++++++++++++--- .../tc-testing/tc-tests/actions/skbedit.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 4566eff3d027..7e1d261a31d2 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -102,6 +103,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, skbedit_net_id); struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tc_skbedit *parm; struct tcf_skbedit *d; u32 flags = 0, *priority = NULL, *mark = NULL, *mask = NULL; @@ -187,11 +189,14 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, return -EEXIST; } } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } params_new->flags = flags; @@ -209,16 +214,24 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, params_new->mask = *mask; spin_lock_bh(&d->tcf_lock); - d->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(d->params, params_new, lockdep_is_held(&d->tcf_lock)); spin_unlock_bh(&d->tcf_lock); if (params_new) kfree_rcu(params_new, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json index 5aaf593b914a..ecd96eda7f6a 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json @@ -484,5 +484,30 @@ "teardown": [ "$TC actions flush action skbedit" ] + }, + { + "id": "1b2b", + "name": "Replace skbedit action with invalid goto_chain control", + "category": [ + "actions", + "skbedit" + ], + "setup": [ + [ + "$TC actions flush action skbedit", + 0, + 1, + 255 + ], + "$TC actions add action skbedit ptype host pass index 90" + ], + "cmdUnderTest": "$TC actions replace action skbedit ptype host goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions list action skbedit", + "matchPattern": "action order [0-9]*: skbedit ptype host pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbedit" + ] } ] -- cgit From 7c3d825d12c5e6056ea73c0a202cbdef9d9ab9e6 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:12 +0100 Subject: net/sched: act_skbmod: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action skbmod set smac 00:c1:a0:c1:a0:00 pass index 90 # tc actions replace action skbmod \ > set smac 00:c1:a0:c1:a0:00 goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action skbmod had the following output: src MAC address <00:c1:a0:c1:a0:00> src MAC address <00:c1:a0:c1:a0:00> Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: skbmod goto chain 42 set smac 00:c1:a0:c1:a0:00 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000002d5c7067 P4D 800000002d5c7067 PUD 77e16067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff8987ffd83be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff8987aeb68800 RCX: ffff8987fa263640 RDX: 0000000000000000 RSI: ffff8987f51c8802 RDI: 00000000000000a0 RBP: ffff8987ffd83c80 R08: ffff8987f939bac8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff8987f5c77d00 R13: ffff8987f5c77d08 R14: 0000000000000001 R15: ffff8987f0c29f00 FS: 0000000000000000(0000) GS:ffff8987ffd80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007832c004 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 56 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffa2a1c038feb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffffa94184f0 RBX: 0000000000000003 RCX: 0000000000000001 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000003 RBP: 0000000000000003 R08: 001123cfc2ba71ac R09: 0000000000000000 R10: 0000000000000000 R11: 00000000000f4240 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_skbmod veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul ghash_clmulni_intel mbcache jbd2 snd_hda_codec_generic snd_hda_intel snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device aesni_intel crypto_simd cryptd glue_helper snd_pcm joydev pcspkr virtio_balloon snd_timer snd i2c_piix4 soundcore nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt fb_sys_fops net_failover virtio_console ttm virtio_blk failover drm crc32c_intel serio_raw ata_piix virtio_pci libata virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_skbmod_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_skbmod.c | 19 +++++++++++++--- .../tc-testing/tc-tests/actions/skbmod.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index b9ab2c8f07f1..1d4c324d0a42 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -88,6 +89,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, skbmod_net_id); struct nlattr *tb[TCA_SKBMOD_MAX + 1]; struct tcf_skbmod_params *p, *p_old; + struct tcf_chain *goto_ch = NULL; struct tc_skbmod *parm; struct tcf_skbmod *d; bool exists = false; @@ -154,21 +156,24 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; d = to_skbmod(*a); p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->flags = lflags; - d->tcf_action = parm->action; if (ovr) spin_lock_bh(&d->tcf_lock); /* Protected by tcf_lock if overwriting existing action. */ + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); p_old = rcu_dereference_protected(d->skbmod_p, 1); if (lflags & SKBMOD_F_DMAC) @@ -184,10 +189,18 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (p_old) kfree_rcu(p_old, rcu); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_skbmod_cleanup(struct tc_action *a) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json index fe3326e939c1..6eb4c4f97060 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json @@ -392,5 +392,30 @@ "teardown": [ "$TC actions flush action skbmod" ] + }, + { + "id": "b651", + "name": "Replace skbmod action with invalid goto_chain control", + "category": [ + "actions", + "skbmod" + ], + "setup": [ + [ + "$TC actions flush action skbmod", + 0, + 1, + 255 + ], + "$TC actions add action skbmod set etype 0x1111 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action skbmod set etype 0x1111 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions ls action skbmod", + "matchPattern": "action order [0-9]*: skbmod pass set etype 0x1111\\s+index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action skbmod" + ] } ] -- cgit From e5fdabacbffc5d321bf9f51410fe0db0834606eb Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:13 +0100 Subject: net/sched: act_tunnel_key: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.2 dst_port 3128 \ > nocsum id 1 pass index 90 # tc actions replace action tunnel_key \ > set src_ip 10.10.10.1 dst_ip 20.20.2 dst_port 3128 nocsum id 1 \ > goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action tunnel_key had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.2.0 key_id 1 dst_port 3128 nocsum goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000002aba4067 P4D 800000002aba4067 PUD 795f9067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff9346bdb83be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff9346bb795c00 RCX: 0000000000000002 RDX: 0000000000000000 RSI: ffff93466c881700 RDI: 0000000000000246 RBP: ffff9346bdb83c80 R08: ffff9346b3e1e0c8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff9346b978f000 R13: ffff9346b978f008 R14: 0000000000000001 R15: ffff93466dceeb40 FS: 0000000000000000(0000) GS:ffff9346bdb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 000000007a6c2002 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? tick_sched_timer+0x37/0x70 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 55 ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffa48a8038feb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffffaa8184f0 RBX: 0000000000000003 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000087 RDI: 0000000000000003 RBP: 0000000000000003 R08: 0011251c6fcfac49 R09: ffff9346b995be00 R10: ffffa48a805e7ce8 R11: 00000000024c38dd R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_tunnel_key veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 crct10dif_pclmul crc32_pclmul snd_hda_codec_generic ghash_clmulni_intel mbcache snd_hda_intel jbd2 snd_hda_codec snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev snd_timer snd pcspkr virtio_balloon soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect virtio_net sysimgblt fb_sys_fops ttm net_failover virtio_console virtio_blk failover drm serio_raw crc32c_intel ata_piix virtio_pci floppy virtio_ring libata virtio dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_tunnel_key_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_tunnel_key.c | 18 ++++++++++++++-- .../tc-testing/tc-tests/actions/tunnel_key.json | 25 ++++++++++++++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fc295d91559a..d5aaf90a3971 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -217,6 +218,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; struct metadata_dst *metadata = NULL; + struct tcf_chain *goto_ch = NULL; struct tc_tunnel_key *parm; struct tcf_tunnel_key *t; bool exists = false; @@ -360,6 +362,12 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, goto release_tun_meta; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) { + ret = err; + exists = true; + goto release_tun_meta; + } t = to_tunnel_key(*a); params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); @@ -367,23 +375,29 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, NL_SET_ERR_MSG(extack, "Cannot allocate tunnel key parameters"); ret = -ENOMEM; exists = true; - goto release_tun_meta; + goto put_chain; } params_new->tcft_action = parm->t_action; params_new->tcft_enc_metadata = metadata; spin_lock_bh(&t->tcf_lock); - t->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(t->params, params_new, lockdep_is_held(&t->tcf_lock)); spin_unlock_bh(&t->tcf_lock); tunnel_key_release_params(params_new); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); + release_tun_meta: if (metadata) dst_release(&metadata->dst); diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json index e7e15a7336b6..28453a445fdb 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json @@ -884,5 +884,30 @@ "teardown": [ "$TC actions flush action tunnel_key" ] + }, + { + "id": "8242", + "name": "Replace tunnel_key set action with invalid goto chain", + "category": [ + "actions", + "tunnel_key" + ], + "setup": [ + [ + "$TC actions flush action tunnel_key", + 0, + 1, + 255 + ], + "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 dst_port 3128 nocsum id 1 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action tunnel_key set src_ip 10.10.10.2 dst_ip 20.20.20.1 dst_port 3129 id 2 csum goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action tunnel_key index 90", + "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 10.10.10.1.*dst_ip 20.20.20.2.*key_id 1.*dst_port 3128.*csum pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action tunnel_key" + ] } ] -- cgit From 7e0c8892df7d0316ec853adbf84db536cd53258c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:14 +0100 Subject: net/sched: act_vlan: validate the control action inside init() the following script: # tc qdisc add dev crash0 clsact # tc filter add dev crash0 egress matchall \ > action vlan pop pass index 90 # tc actions replace action vlan \ > pop goto chain 42 index 90 cookie c1a0c1a0 # tc actions show action vlan had the following output: Error: Failed to init TC action chain. We have an error talking to the kernel total acts 1 action order 0: vlan pop goto chain 42 index 90 ref 2 bind 1 cookie c1a0c1a0 Then, the first packet transmitted by crash0 made the kernel crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 #PF error: [normal kernel read fault] PGD 800000007974f067 P4D 800000007974f067 PUD 79638067 PMD 0 Oops: 0000 [#1] SMP PTI CPU: 3 PID: 0 Comm: swapper/3 Not tainted 5.0.0-rc4.gotochain_crash+ #536 Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011 RIP: 0010:tcf_action_exec+0xb8/0x100 Code: 00 00 00 20 74 1d 83 f8 03 75 09 49 83 c4 08 4d 39 ec 75 bc 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 49 8b 97 a8 00 00 00 <48> 8b 12 48 89 55 00 48 83 c4 10 5b 5d 41 5c 41 5d 41 5e 41 5f c3 RSP: 0018:ffff982dfdb83be0 EFLAGS: 00010246 RAX: 000000002000002a RBX: ffff982dfc55db00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff982df97099c0 RDI: ffff982dfc55db00 RBP: ffff982dfdb83c80 R08: ffff982df983fec8 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffff982df5aacd00 R13: ffff982df5aacd08 R14: 0000000000000001 R15: ffff982df97099c0 FS: 0000000000000000(0000) GS:ffff982dfdb80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000796d0005 CR4: 00000000001606e0 Call Trace: tcf_classify+0x58/0x120 __dev_queue_xmit+0x40a/0x890 ? ip6_finish_output2+0x369/0x590 ip6_finish_output2+0x369/0x590 ? ip6_output+0x68/0x110 ip6_output+0x68/0x110 ? nf_hook.constprop.35+0x79/0xc0 mld_sendpack+0x16f/0x220 mld_ifc_timer_expire+0x195/0x2c0 ? igmp6_timer_handler+0x70/0x70 call_timer_fn+0x2b/0x130 run_timer_softirq+0x3e8/0x440 ? enqueue_hrtimer+0x39/0x90 __do_softirq+0xe3/0x2f5 irq_exit+0xf0/0x100 smp_apic_timer_interrupt+0x6c/0x130 apic_timer_interrupt+0xf/0x20 RIP: 0010:native_safe_halt+0x2/0x10 Code: 7b ff ff ff 7f f3 c3 65 48 8b 04 25 00 5c 01 00 f0 80 48 02 20 48 8b 00 a8 08 74 8b eb c1 90 90 90 90 90 90 90 90 90 90 fb f4 0f 1f 00 66 2e 0f 1f 84 00 00 00 00 00 f4 c3 90 90 90 90 90 90 RSP: 0018:ffffa4714038feb8 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13 RAX: ffffffff840184f0 RBX: 0000000000000003 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000000 RDI: 0000001e57d3f387 RBP: 0000000000000003 R08: 001125d9ca39e1eb R09: 0000000000000000 R10: 000000000000027d R11: 000000000009f400 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 ? __sched_text_end+0x1/0x1 default_idle+0x1c/0x140 do_idle+0x1c4/0x280 cpu_startup_entry+0x19/0x20 start_secondary+0x1a7/0x200 secondary_startup_64+0xa4/0xb0 Modules linked in: act_vlan veth ip6table_filter ip6_tables iptable_filter binfmt_misc ext4 snd_hda_codec_generic mbcache crct10dif_pclmul jbd2 snd_hda_intel crc32_pclmul snd_hda_codec ghash_clmulni_intel snd_hwdep snd_hda_core snd_seq snd_seq_device snd_pcm aesni_intel crypto_simd cryptd glue_helper joydev snd_timer virtio_balloon snd pcspkr soundcore i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs ata_generic pata_acpi qxl drm_kms_helper syscopyarea sysfillrect sysimgblt virtio_net fb_sys_fops virtio_blk ttm net_failover virtio_console failover ata_piix drm libata crc32c_intel virtio_pci serio_raw virtio_ring virtio floppy dm_mirror dm_region_hash dm_log dm_mod CR2: 0000000000000000 Validating the control action within tcf_vlan_init() proved to fix the above issue. A TDC selftest is added to verify the correct behavior. Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain") Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- net/sched/act_vlan.c | 20 ++++++++++++++--- .../tc-testing/tc-tests/actions/vlan.json | 25 ++++++++++++++++++++++ 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 4651ee15e35d..0f40d0a74423 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -109,6 +110,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_chain *goto_ch = NULL; struct tcf_vlan_params *p; struct tc_vlan *parm; struct tcf_vlan *v; @@ -200,12 +202,16 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, return -EEXIST; } + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); + if (err < 0) + goto release_idr; + v = to_vlan(*a); p = kzalloc(sizeof(*p), GFP_KERNEL); if (!p) { - tcf_idr_release(*a, bind); - return -ENOMEM; + err = -ENOMEM; + goto put_chain; } p->tcfv_action = action; @@ -214,16 +220,24 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, p->tcfv_push_proto = push_proto; spin_lock_bh(&v->tcf_lock); - v->tcf_action = parm->action; + goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); rcu_swap_protected(v->vlan_p, p, lockdep_is_held(&v->tcf_lock)); spin_unlock_bh(&v->tcf_lock); + if (goto_ch) + tcf_chain_put_by_act(goto_ch); if (p) kfree_rcu(p, rcu); if (ret == ACT_P_CREATED) tcf_idr_insert(tn, *a); return ret; +put_chain: + if (goto_ch) + tcf_chain_put_by_act(goto_ch); +release_idr: + tcf_idr_release(*a, bind); + return err; } static void tcf_vlan_cleanup(struct tc_action *a) diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json index 69ea09eefffc..cc7c7d758008 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json @@ -688,5 +688,30 @@ "teardown": [ "$TC actions flush action vlan" ] + }, + { + "id": "e394", + "name": "Replace vlan push action with invalid goto chain control", + "category": [ + "actions", + "vlan" + ], + "setup": [ + [ + "$TC actions flush action vlan", + 0, + 1, + 255 + ], + "$TC actions add action vlan push id 500 pass index 90" + ], + "cmdUnderTest": "$TC actions replace action vlan push id 500 goto chain 42 index 90 cookie c1a0c1a0", + "expExitCode": "255", + "verifyCmd": "$TC actions get action vlan index 90", + "matchPattern": "action order [0-9]+: vlan.*push id 500 protocol 802.1Q priority 0 pass.*index 90 ref", + "matchCount": "1", + "teardown": [ + "$TC actions flush action vlan" + ] } ] -- cgit From fe384e2fa36ca084a456fd30558cccc75b4b3fbd Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:15 +0100 Subject: net/sched: don't dereference a->goto_chain to read the chain index callers of tcf_gact_goto_chain_index() can potentially read an old value of the chain index, or even dereference a NULL 'goto_chain' pointer, because 'goto_chain' and 'tcfa_action' are read in the traffic path without caring of concurrent write in the control path. The most recent value of chain index can be read also from a->tcfa_action (it's encoded there together with TC_ACT_GOTO_CHAIN bits), so we don't really need to dereference 'goto_chain': just read the chain id from the control action. Fixes: e457d86ada27 ("net: sched: add couple of goto_chain helpers") Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/net/tc_act/tc_gact.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index ee8d005f56fc..eb8f01c819e6 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -56,7 +56,7 @@ static inline bool is_tcf_gact_goto_chain(const struct tc_action *a) static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a) { - return a->goto_chain->index; + return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK; } #endif /* __NET_TC_GACT_H */ -- cgit From ee3bbfe806cdb46b02cda63626cb50a7a7b19fc5 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 20 Mar 2019 15:00:16 +0100 Subject: net/sched: let actions use RCU to access 'goto_chain' use RCU when accessing the action chain, to avoid use after free in the traffic path when 'goto chain' is replaced on existing TC actions (see script below). Since the control action is read in the traffic path without holding the action spinlock, we need to explicitly ensure that a->goto_chain is not NULL before dereferencing (i.e it's not sufficient to rely on the value of TC_ACT_GOTO_CHAIN bits). Not doing so caused NULL dereferences in tcf_action_goto_chain_exec() when the following script: # tc chain add dev dd0 chain 42 ingress protocol ip flower \ > ip_proto udp action pass index 4 # tc filter add dev dd0 ingress protocol ip flower \ > ip_proto udp action csum udp goto chain 42 index 66 # tc chain del dev dd0 chain 42 ingress (start UDP traffic towards dd0) # tc action replace action csum udp pass index 66 was run repeatedly for several hours. Suggested-by: Cong Wang Suggested-by: Vlad Buslov Signed-off-by: Davide Caratti Signed-off-by: David S. Miller --- include/net/act_api.h | 2 +- include/net/sch_generic.h | 1 + net/sched/act_api.c | 18 ++++++++++-------- net/sched/cls_api.c | 2 +- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/net/act_api.h b/include/net/act_api.h index 54fbb49bd08a..c61a1bf4e3de 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -39,7 +39,7 @@ struct tc_action { struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie __rcu *act_cookie; - struct tcf_chain *goto_chain; + struct tcf_chain __rcu *goto_chain; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 31284c078d06..7d1a0483a17b 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -378,6 +378,7 @@ struct tcf_chain { bool flushing; const struct tcf_proto_ops *tmplt_ops; void *tmplt_priv; + struct rcu_head rcu; }; struct tcf_block { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index fe67b98ac641..5a87e271d35a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -31,7 +31,7 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a, struct tcf_result *res) { - const struct tcf_chain *chain = a->goto_chain; + const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain); res->goto_tp = rcu_dereference_bh(chain->filter_chain); } @@ -91,13 +91,11 @@ end: EXPORT_SYMBOL(tcf_action_check_ctrlact); struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action, - struct tcf_chain *newchain) + struct tcf_chain *goto_chain) { - struct tcf_chain *oldchain = a->goto_chain; - a->tcfa_action = action; - a->goto_chain = newchain; - return oldchain; + rcu_swap_protected(a->goto_chain, goto_chain, 1); + return goto_chain; } EXPORT_SYMBOL(tcf_action_set_ctrlact); @@ -108,7 +106,7 @@ EXPORT_SYMBOL(tcf_action_set_ctrlact); */ static void free_tcf(struct tc_action *p) { - struct tcf_chain *chain = p->goto_chain; + struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1); free_percpu(p->cpu_bstats); free_percpu(p->cpu_bstats_hw); @@ -686,6 +684,10 @@ repeat: return TC_ACT_OK; } } else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) { + if (unlikely(!rcu_access_pointer(a->goto_chain))) { + net_warn_ratelimited("can't go to NULL chain!\n"); + return TC_ACT_SHOT; + } tcf_action_goto_chain_exec(a, res); } @@ -931,7 +933,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, module_put(a_o->owner); if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) && - !a->goto_chain) { + !rcu_access_pointer(a->goto_chain)) { tcf_action_destroy_1(a, bind); NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain"); return ERR_PTR(-EINVAL); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc10525e90e7..99ae30c177c7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -367,7 +367,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block) struct tcf_block *block = chain->block; mutex_destroy(&chain->filter_chain_lock); - kfree(chain); + kfree_rcu(chain, rcu); if (free_block) tcf_block_destroy(block); } -- cgit From 6b70fc94afd165342876e53fc4b2f7d085009945 Mon Sep 17 00:00:00 2001 From: Wang Hai Date: Wed, 20 Mar 2019 14:25:05 -0400 Subject: net-sysfs: Fix memory leak in netdev_register_kobject When registering struct net_device, it will call register_netdevice -> netdev_register_kobject -> device_initialize(dev); dev_set_name(dev, "%s", ndev->name) device_add(dev) register_queue_kobjects(ndev) In netdev_register_kobject(), if device_add(dev) or register_queue_kobjects(ndev) failed. Register_netdevice() will return error, causing netdev_freemem(ndev) to be called to free net_device, however put_device(&dev->dev)->..-> kobject_cleanup() won't be called, resulting in a memory leak. syzkaller report this: BUG: memory leak unreferenced object 0xffff8881f4fad168 (size 8): comm "syz-executor.0", pid 3575, jiffies 4294778002 (age 20.134s) hex dump (first 8 bytes): 77 70 61 6e 30 00 ff ff wpan0... backtrace: [<000000006d2d91d7>] kstrdup_const+0x3d/0x50 mm/util.c:73 [<00000000ba9ff953>] kvasprintf_const+0x112/0x170 lib/kasprintf.c:48 [<000000005555ec09>] kobject_set_name_vargs+0x55/0x130 lib/kobject.c:281 [<0000000098d28ec3>] dev_set_name+0xbb/0xf0 drivers/base/core.c:1915 [<00000000b7553017>] netdev_register_kobject+0xc0/0x410 net/core/net-sysfs.c:1727 [<00000000c826a797>] register_netdevice+0xa51/0xeb0 net/core/dev.c:8711 [<00000000857bfcfd>] cfg802154_update_iface_num.isra.2+0x13/0x90 [ieee802154] [<000000003126e453>] ieee802154_llsec_fill_key_id+0x1d5/0x570 [ieee802154] [<00000000e4b3df51>] 0xffffffffc1500e0e [<00000000b4319776>] platform_drv_probe+0xc6/0x180 drivers/base/platform.c:614 [<0000000037669347>] really_probe+0x491/0x7c0 drivers/base/dd.c:509 [<000000008fed8862>] driver_probe_device+0xdc/0x240 drivers/base/dd.c:671 [<00000000baf52041>] device_driver_attach+0xf2/0x130 drivers/base/dd.c:945 [<00000000c7cc8dec>] __driver_attach+0x10e/0x210 drivers/base/dd.c:1022 [<0000000057a757c2>] bus_for_each_dev+0x154/0x1e0 drivers/base/bus.c:304 [<000000005f5ae04b>] bus_add_driver+0x427/0x5e0 drivers/base/bus.c:645 Reported-by: Hulk Robot Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array") Signed-off-by: Wang Hai Reviewed-by: Andy Shevchenko Reviewed-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 8f8b7b6c2945..f8f94303a1f5 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1747,16 +1747,20 @@ int netdev_register_kobject(struct net_device *ndev) error = device_add(dev); if (error) - return error; + goto error_put_device; error = register_queue_kobjects(ndev); - if (error) { - device_del(dev); - return error; - } + if (error) + goto error_device_del; pm_runtime_set_memalloc_noio(dev, true); + return 0; + +error_device_del: + device_del(dev); +error_put_device: + put_device(dev); return error; } -- cgit From 408f13ef358aa5ad56dc6230c2c7deb92cf462b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 21 Mar 2019 09:39:52 +0800 Subject: rhashtable: Still do rehash when we get EEXIST As it stands if a shrink is delayed because of an outstanding rehash, we will go into a rescheduling loop without ever doing the rehash. This patch fixes this by still carrying out the rehash and then rescheduling so that we can shrink after the completion of the rehash should it still be necessary. The return value of EEXIST captures this case and other cases (e.g., another thread expanded/rehashed the table at the same time) where we should still proceed with the rehash. Fixes: da20420f83ea ("rhashtable: Add nested tables") Reported-by: Josh Elsasser Signed-off-by: Herbert Xu Tested-by: Josh Elsasser Signed-off-by: David S. Miller --- lib/rhashtable.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 0a105d4af166..97f59abc3e92 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -416,8 +416,12 @@ static void rht_deferred_worker(struct work_struct *work) else if (tbl->nest) err = rhashtable_rehash_alloc(ht, tbl, tbl->size); - if (!err) - err = rhashtable_rehash_table(ht); + if (!err || err == -EEXIST) { + int nerr; + + nerr = rhashtable_rehash_table(ht); + err = err ?: nerr; + } mutex_unlock(&ht->mutex); -- cgit From 5f543a54eec08228ab0cc0a49cf5d79dd32c9e5e Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Thu, 21 Mar 2019 11:28:43 +0800 Subject: net: hns3: fix for not calculating tx bd num correctly When there is only one byte in a frag, the current calculation using "(size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET" will return zero, because HNS3_MAX_BD_SIZE is 65535 and HNS3_MAX_BD_SIZE_OFFSET is 16. So it will cause tx error when a frag's size is one byte. This patch fixes it by using DIV_ROUND_UP. Fixes: 3fe13ed95dd3 ("net: hns3: avoid mult + div op in critical data path") Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 13 ++++++------- drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 1 - 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index 1c1f17ec6be2..162cb9afa0e7 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -22,6 +22,7 @@ #include "hns3_enet.h" #define hns3_set_field(origin, shift, val) ((origin) |= ((val) << (shift))) +#define hns3_tx_bd_count(S) DIV_ROUND_UP(S, HNS3_MAX_BD_SIZE) static void hns3_clear_all_ring(struct hnae3_handle *h); static void hns3_force_clear_all_rx_ring(struct hnae3_handle *h); @@ -1079,7 +1080,7 @@ static int hns3_fill_desc(struct hns3_enet_ring *ring, void *priv, desc_cb->length = size; - frag_buf_num = (size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET; + frag_buf_num = hns3_tx_bd_count(size); sizeoflast = size & HNS3_TX_LAST_SIZE_M; sizeoflast = sizeoflast ? sizeoflast : HNS3_MAX_BD_SIZE; @@ -1124,14 +1125,13 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum, int i; size = skb_headlen(skb); - buf_num = (size + HNS3_MAX_BD_SIZE - 1) >> HNS3_MAX_BD_SIZE_OFFSET; + buf_num = hns3_tx_bd_count(size); frag_num = skb_shinfo(skb)->nr_frags; for (i = 0; i < frag_num; i++) { frag = &skb_shinfo(skb)->frags[i]; size = skb_frag_size(frag); - bdnum_for_frag = (size + HNS3_MAX_BD_SIZE - 1) >> - HNS3_MAX_BD_SIZE_OFFSET; + bdnum_for_frag = hns3_tx_bd_count(size); if (unlikely(bdnum_for_frag > HNS3_MAX_BD_PER_FRAG)) return -ENOMEM; @@ -1139,8 +1139,7 @@ static int hns3_nic_maybe_stop_tso(struct sk_buff **out_skb, int *bnum, } if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) { - buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) >> - HNS3_MAX_BD_SIZE_OFFSET; + buf_num = hns3_tx_bd_count(skb->len); if (ring_space(ring) < buf_num) return -EBUSY; /* manual split the send packet */ @@ -1169,7 +1168,7 @@ static int hns3_nic_maybe_stop_tx(struct sk_buff **out_skb, int *bnum, buf_num = skb_shinfo(skb)->nr_frags + 1; if (unlikely(buf_num > HNS3_MAX_BD_PER_FRAG)) { - buf_num = (skb->len + HNS3_MAX_BD_SIZE - 1) / HNS3_MAX_BD_SIZE; + buf_num = hns3_tx_bd_count(skb->len); if (ring_space(ring) < buf_num) return -EBUSY; /* manual split the send packet */ diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h index 1db0bd41d209..75669cd0c311 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h @@ -193,7 +193,6 @@ enum hns3_nic_state { #define HNS3_VECTOR_INITED 1 #define HNS3_MAX_BD_SIZE 65535 -#define HNS3_MAX_BD_SIZE_OFFSET 16 #define HNS3_MAX_BD_PER_FRAG 8 #define HNS3_MAX_BD_PER_PKT MAX_SKB_FRAGS -- cgit From 2219c9ee922d9a0548b44fb82342166d9d9fdbc9 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 21 Feb 2019 03:38:51 +0000 Subject: drm/nouveau/dmem: remove set but not used variable 'drm' Fixes gcc '-Wunused-but-set-variable' warning: drivers/gpu/drm/nouveau/nouveau_dmem.c: In function 'nouveau_dmem_free': drivers/gpu/drm/nouveau/nouveau_dmem.c:103:22: warning: variable 'drm' set but not used [-Wunused-but-set-variable] struct nouveau_drm *drm; ^ Signed-off-by: YueHaibing Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 8be7a83ced9b..58b4cb389d87 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -100,12 +100,10 @@ static void nouveau_dmem_free(struct hmm_devmem *devmem, struct page *page) { struct nouveau_dmem_chunk *chunk; - struct nouveau_drm *drm; unsigned long idx; chunk = (void *)hmm_devmem_page_get_drvdata(page); idx = page_to_pfn(page) - chunk->pfn_first; - drm = chunk->drm; /* * FIXME: -- cgit From 18ec3c129bcad0a481ee7faf8ce5ad92d6537722 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 22 Feb 2019 09:34:04 +0300 Subject: drm/nouveau/dmem: Fix a NULL vs IS_ERR() check The hmm_devmem_add() function doesn't return NULL, it returns error pointers. Fixes: 5be73b690875 ("drm/nouveau/dmem: device memory helpers for SVM") Signed-off-by: Dan Carpenter Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 58b4cb389d87..b1df42f73ec2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -621,7 +621,7 @@ nouveau_dmem_init(struct nouveau_drm *drm) */ drm->dmem->devmem = hmm_devmem_add(&nouveau_dmem_devmem_ops, device, size); - if (drm->dmem->devmem == NULL) { + if (IS_ERR(drm->dmem->devmem)) { kfree(drm->dmem); drm->dmem = NULL; return; -- cgit From 909e9c9c428376e2a43d178ed4b0a2d5ba9cb7d3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Feb 2019 20:24:59 +0800 Subject: drm/nouveau/debugfs: Fix check of pm_runtime_get_sync failure pm_runtime_get_sync returns negative on failure. Fixes: eaeb9010bb4b ("drm/nouveau/debugfs: Wake up GPU before doing any reclocking") Signed-off-by: YueHaibing Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 88a52f6b39fe..7dfbbbc1beea 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -181,7 +181,7 @@ nouveau_debugfs_pstate_set(struct file *file, const char __user *ubuf, } ret = pm_runtime_get_sync(drm->dev); - if (IS_ERR_VALUE(ret) && ret != -EACCES) + if (ret < 0 && ret != -EACCES) return ret; ret = nvif_mthd(ctrl, NVIF_CONTROL_PSTATE_USER, &args, sizeof(args)); pm_runtime_put_autosuspend(drm->dev); -- cgit From 83857418073fda5e0d562e6639b45ae06c957131 Mon Sep 17 00:00:00 2001 From: Jérôme Glisse Date: Thu, 21 Mar 2019 15:42:18 -0400 Subject: drm/nouveau/dmem: empty chunk do not have a buffer object associated with them. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empty chunk do not have a bo associated with them so no need to pin/unpin on suspend/resume. This fix suspend/resume on 5.1rc1 when NOUVEAU_SVM is enabled. Signed-off-by: Jérôme Glisse Reviewed-by: Tobias Klausmann Tested-by: Tobias Klausmann Cc: Ben Skeggs Cc: dri-devel@lists.freedesktop.org Cc: nouveau@lists.freedesktop.org Cc: David Airlie Cc: Daniel Vetter Signed-off-by: Ben Skeggs --- drivers/gpu/drm/nouveau/nouveau_dmem.c | 8 -------- 1 file changed, 8 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index b1df42f73ec2..61fc27d30ff2 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -454,11 +454,6 @@ nouveau_dmem_resume(struct nouveau_drm *drm) /* FIXME handle pin failure */ WARN_ON(ret); } - list_for_each_entry (chunk, &drm->dmem->chunk_empty, list) { - ret = nouveau_bo_pin(chunk->bo, TTM_PL_FLAG_VRAM, false); - /* FIXME handle pin failure */ - WARN_ON(ret); - } mutex_unlock(&drm->dmem->mutex); } @@ -477,9 +472,6 @@ nouveau_dmem_suspend(struct nouveau_drm *drm) list_for_each_entry (chunk, &drm->dmem->chunk_full, list) { nouveau_bo_unpin(chunk->bo); } - list_for_each_entry (chunk, &drm->dmem->chunk_empty, list) { - nouveau_bo_unpin(chunk->bo); - } mutex_unlock(&drm->dmem->mutex); } -- cgit From 41b37f4c0fa67185691bcbd30201cad566f2f0d1 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 19 Mar 2019 01:30:09 +0900 Subject: ARM: dts: imx6qdl: Fix typo in imx6qdl-icore-rqs.dtsi This patch fixes a spelling typo. Signed-off-by: Masanari Iida Fixes: cc42603de320 ("ARM: dts: imx6q-icore-rqs: Add Engicam IMX6 Q7 initial support") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi index 1d1b4bd0670f..a4217f564a53 100644 --- a/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi +++ b/arch/arm/boot/dts/imx6qdl-icore-rqs.dtsi @@ -264,7 +264,7 @@ pinctrl-2 = <&pinctrl_usdhc3_200mhz>; vmcc-supply = <®_sd3_vmmc>; cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; - bus-witdh = <4>; + bus-width = <4>; no-1-8-v; status = "okay"; }; @@ -275,7 +275,7 @@ pinctrl-1 = <&pinctrl_usdhc4_100mhz>; pinctrl-2 = <&pinctrl_usdhc4_200mhz>; vmcc-supply = <®_sd4_vmmc>; - bus-witdh = <8>; + bus-width = <8>; no-1-8-v; non-removable; status = "okay"; -- cgit From 15b43e497ffd80ca44c00d781869a0e71f223ea5 Mon Sep 17 00:00:00 2001 From: Michal Vokáč Date: Wed, 20 Mar 2019 12:09:05 +0100 Subject: ARM: dts: imx6dl-yapp4: Use correct pseudo PHY address for the switch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The switch is accessible through pseudo PHY which is located at 0x10. Signed-off-by: Michal Vokáč Fixes: 87489ec3a77f ("ARM: dts: imx: Add Y Soft IOTA Draco, Hydra and Ursa boards") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx6dl-yapp4-common.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi index 091d829f6b05..e8d800fec637 100644 --- a/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi +++ b/arch/arm/boot/dts/imx6dl-yapp4-common.dtsi @@ -114,9 +114,9 @@ reg = <2>; }; - switch@0 { + switch@10 { compatible = "qca,qca8334"; - reg = <0>; + reg = <10>; switch_ports: ports { #address-cells = <1>; -- cgit From 728e096dd70889c2e80dd4153feee91afb1daf72 Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 10 Jan 2019 21:19:33 +0100 Subject: ARM: imx_v6_v7_defconfig: continue compiling the pwm driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the pwm-imx driver was split into two drivers and the Kconfig symbol changed accordingly, use the new name to continue being able to use the PWM hardware. Signed-off-by: Uwe Kleine-König Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v6_v7_defconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig index 5586a5074a96..50fb01d70b10 100644 --- a/arch/arm/configs/imx_v6_v7_defconfig +++ b/arch/arm/configs/imx_v6_v7_defconfig @@ -398,7 +398,7 @@ CONFIG_MAG3110=y CONFIG_MPL3115=y CONFIG_PWM=y CONFIG_PWM_FSL_FTM=y -CONFIG_PWM_IMX=y +CONFIG_PWM_IMX27=y CONFIG_NVMEM_IMX_OCOTP=y CONFIG_NVMEM_VF610_OCOTP=y CONFIG_TEE=y -- cgit From 507aaeeef80d70c46bdf07cda49234b36c2bbdcb Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 10 Jan 2019 21:19:34 +0100 Subject: ARM: imx_v4_v5_defconfig: enable PWM driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While there is no mainline board that makes use of the PWM still enable the driver for it to increase compile test coverage. Signed-off-by: Uwe Kleine-König Signed-off-by: Shawn Guo --- arch/arm/configs/imx_v4_v5_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/configs/imx_v4_v5_defconfig b/arch/arm/configs/imx_v4_v5_defconfig index 8661dd9b064a..b37f8e675e40 100644 --- a/arch/arm/configs/imx_v4_v5_defconfig +++ b/arch/arm/configs/imx_v4_v5_defconfig @@ -170,6 +170,9 @@ CONFIG_IMX_SDMA=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_IIO=y CONFIG_FSL_MX25_ADC=y +CONFIG_PWM=y +CONFIG_PWM_IMX1=y +CONFIG_PWM_IMX27=y CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_VFAT_FS=y -- cgit From 83d163124cf1104cca5b668d5fe6325715a60855 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 21 Mar 2019 14:34:36 -0700 Subject: bpf: verifier: propagate liveness on all frames Commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences") connected up parentage chains of all frames of the stack. It didn't, however, ensure propagate_liveness() propagates all liveness information along those chains. This means pruning happening in the callee may generate explored states with incomplete liveness for the chains in lower frames of the stack. The included selftest is similar to the prior one from commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences"), where callee would prune regardless of the difference in r8 state. Now we also initialize r9 to 0 or 1 based on a result from get_random(). r9 is never read so the walk with r9 = 0 gets pruned (correctly) after the walk with r9 = 1 completes. The selftest is so arranged that the pruning will happen in the callee. Since callee does not propagate read marks of r8, the explored state at the pruning point prior to the callee will now ignore r8. Propagate liveness on all frames of the stack when pruning. Fixes: f4d7e40a5b71 ("bpf: introduce function calls (verification)") Signed-off-by: Jakub Kicinski Signed-off-by: Alexei Starovoitov --- kernel/bpf/verifier.c | 20 +++++++++++--------- tools/testing/selftests/bpf/verifier/calls.c | 25 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f19d5e04c69d..fd502c1f71eb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -6078,15 +6078,17 @@ static int propagate_liveness(struct bpf_verifier_env *env, } /* Propagate read liveness of registers... */ BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG); - /* We don't need to worry about FP liveness because it's read-only */ - for (i = 0; i < BPF_REG_FP; i++) { - if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ) - continue; - if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) { - err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i], - &vparent->frame[vstate->curframe]->regs[i]); - if (err) - return err; + for (frame = 0; frame <= vstate->curframe; frame++) { + /* We don't need to worry about FP liveness, it's read-only */ + for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) { + if (vparent->frame[frame]->regs[i].live & REG_LIVE_READ) + continue; + if (vstate->frame[frame]->regs[i].live & REG_LIVE_READ) { + err = mark_reg_read(env, &vstate->frame[frame]->regs[i], + &vparent->frame[frame]->regs[i]); + if (err) + return err; + } } } diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 4004891afa9c..f2ccae39ee66 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1940,3 +1940,28 @@ .errstr = "!read_ok", .result = REJECT, }, +{ + "calls: cross frame pruning - liveness propagation", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_8, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_8, 1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_MOV64_IMM(BPF_REG_9, 0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_9, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_8, 1, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, + .errstr_unpriv = "function calls to other bpf functions are allowed for root only", + .errstr = "!read_ok", + .result = REJECT, +}, -- cgit From e1037354a0a75acdea2b27043c0a371ed85cf262 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 22 Mar 2019 11:37:18 +0800 Subject: ALSA: hda/realtek: Enable ASUS X441MB and X705FD headset MIC with ALC256 The ASUS laptop X441MB and X705FD with ALC256 cannot detect the headset MIC until ALC256_FIXUP_ASUS_MIC_NO_PRESENCE quirk applied. Signed-off-by: Chris Chiu Signed-off-by: Daniel Drake Signed-off-by: Jian-Hong Pan Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 6042ddf2b2ae..f2539007d09e 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5688,6 +5688,7 @@ enum { ALC225_FIXUP_WYSE_AUTO_MUTE, ALC225_FIXUP_WYSE_DISABLE_MIC_VREF, ALC286_FIXUP_ACER_AIO_HEADSET_MIC, + ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, }; static const struct hda_fixup alc269_fixups[] = { @@ -6696,6 +6697,15 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE }, + [ALC256_FIXUP_ASUS_MIC_NO_PRESENCE] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x19, 0x04a11120 }, /* use as headset mic, without its own jack detect */ + { } + }, + .chained = true, + .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -7334,6 +7344,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x1b, 0x90a70130}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x1a, 0x90a70130}, + {0x1b, 0x90170110}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB, {0x12, 0xb7a60130}, {0x13, 0xb8a61140}, -- cgit From a806ef1cf3bbc0baadc6cdeb11f12b5dd27e91c2 Mon Sep 17 00:00:00 2001 From: Chris Chiu Date: Fri, 22 Mar 2019 11:37:20 +0800 Subject: ALSA: hda/realtek: Enable headset mic of ASUS P5440FF with ALC256 The ASUS laptop P5440FF with ALC256 can't detect the headset microphone until ALC256_FIXUP_ASUS_MIC_NO_PRESENCE quirk applied. Signed-off-by: Chris Chiu Signed-off-by: Daniel Drake Signed-off-by: Jian-Hong Pan Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index f2539007d09e..8ec0cd95341a 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7344,6 +7344,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x14, 0x90170110}, {0x1b, 0x90a70130}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x21, 0x03211020}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, {0x1a, 0x90a70130}, {0x1b, 0x90170110}, -- cgit From 6ac371aa1a74240fb910c98aa3484d5ece8473d3 Mon Sep 17 00:00:00 2001 From: Jian-Hong Pan Date: Fri, 22 Mar 2019 11:37:22 +0800 Subject: ALSA: hda/realtek: Enable headset MIC of ASUS X430UN and X512DK with ALC256 The ASUS X430UN and X512DK with ALC256 cannot detect the headset MIC until ALC256_FIXUP_ASUS_MIC_NO_PRESENCE quirk applied. Signed-off-by: Jian-Hong Pan Signed-off-by: Daniel Drake Cc: Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 8ec0cd95341a..01c71467600b 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -7348,6 +7348,10 @@ static const struct snd_hda_pin_quirk alc269_pin_fixup_tbl[] = { {0x12, 0x90a60130}, {0x14, 0x90170110}, {0x21, 0x03211020}), + SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, + {0x12, 0x90a60130}, + {0x14, 0x90170110}, + {0x21, 0x04211020}), SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, {0x1a, 0x90a70130}, {0x1b, 0x90170110}, -- cgit From 80ef4464d5e27408685e609d389663aad46644b9 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 20 Mar 2019 18:57:23 +0000 Subject: iommu/iova: Fix tracking of recently failed iova address If a 32 bit allocation request is too big to possibly succeed, it early exits with a failure and then should never update max32_alloc_ size. This patch fixes current code, now the size is only updated if the slow path failed while walking the tree. Without the fix the allocation may enter the slow path again even if there was a failure before of a request with the same or a smaller size. Cc: # 4.20+ Fixes: bee60e94a1e2 ("iommu/iova: Optimise attempts to allocate iova from 32bit address range") Reviewed-by: Robin Murphy Signed-off-by: Robert Richter Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index f8d3ba247523..2de8122e218f 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -207,8 +207,10 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, curr_iova = rb_entry(curr, struct iova, node); } while (curr && new_pfn <= curr_iova->pfn_hi); - if (limit_pfn < size || new_pfn < iovad->start_pfn) + if (limit_pfn < size || new_pfn < iovad->start_pfn) { + iovad->max32_alloc_size = size; goto iova32_full; + } /* pfn_lo will point to size aligned address if size_aligned is set */ new->pfn_lo = new_pfn; @@ -222,7 +224,6 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, return 0; iova32_full: - iovad->max32_alloc_size = size; spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); return -ENOMEM; } -- cgit From 5bb71fc790a88d063507dc5d445ab8b14e845591 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:58:33 +0800 Subject: iommu/vt-d: Check capability before disabling protected memory The spec states in 10.4.16 that the Protected Memory Enable Register should be treated as read-only for implementations not supporting protected memory regions (PLMR and PHMR fields reported as Clear in the Capability register). Cc: Jacob Pan Cc: mark gross Suggested-by: Ashok Raj Fixes: f8bab73515ca5 ("intel-iommu: PMEN support") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 87274b54febd..f002d47d2f27 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1538,6 +1538,9 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) u32 pmen; unsigned long flags; + if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) + return; + raw_spin_lock_irqsave(&iommu->register_lock, flags); pmen = readl(iommu->reg + DMAR_PMEN_REG); pmen &= ~DMA_PMEN_EPM; -- cgit From 84c11e4df5aa4955acaa441f0cf1cb2e50daf64b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:58:34 +0800 Subject: iommu/vt-d: Save the right domain ID used by hardware The driver sets a default domain id (FLPT_DEFAULT_DID) in the first level only pasid entry, but saves a different domain id in @sdev->did. The value saved in @sdev->did will be used to invalidate the translation caches. Hence, the driver might result in invalidating the caches with a wrong domain id. Cc: Ashok Raj Cc: Jacob Pan Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") Signed-off-by: Liu Yi L Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f002d47d2f27..28cb713d728c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5335,7 +5335,7 @@ int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sd ctx_lo = context[0].lo; - sdev->did = domain->iommu_did[iommu->seq_id]; + sdev->did = FLPT_DEFAULT_DID; sdev->sid = PCI_DEVID(info->bus, info->devfn); if (!(ctx_lo & CONTEXT_PASIDE)) { -- cgit From 5a07168d8d89b00fe1760120714378175b3ef992 Mon Sep 17 00:00:00 2001 From: Chen Jie Date: Fri, 15 Mar 2019 03:44:38 +0000 Subject: futex: Ensure that futex address is aligned in handle_futex_death() The futex code requires that the user space addresses of futexes are 32bit aligned. sys_futex() checks this in futex_get_keys() but the robust list code has no alignment check in place. As a consequence the kernel crashes on architectures with strict alignment requirements in handle_futex_death() when trying to cmpxchg() on an unaligned futex address which was retrieved from the robust list. [ tglx: Rewrote changelog, proper sizeof() based alignement check and add comment ] Fixes: 0771dfefc9e5 ("[PATCH] lightweight robust futexes: core") Signed-off-by: Chen Jie Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1552621478-119787-1-git-send-email-chenjie6@huawei.com --- kernel/futex.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/futex.c b/kernel/futex.c index c3b73b0311bc..9e40cf7be606 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -3436,6 +3436,10 @@ static int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int p { u32 uval, uninitialized_var(nval), mval; + /* Futex address must be 32bit aligned */ + if ((((unsigned long)uaddr) % sizeof(*uaddr)) != 0) + return -1; + retry: if (get_user(uval, uaddr)) return -1; -- cgit From 4fe64a62e04cfb2dc1daab0d8f05d212aa014161 Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 12 Mar 2019 03:47:53 -0400 Subject: x86/mm/pti: Make local symbols static With 'make C=2 W=1', sparse and gcc both complain: CHECK arch/x86/mm/pti.c arch/x86/mm/pti.c:84:3: warning: symbol 'pti_mode' was not declared. Should it be static? arch/x86/mm/pti.c:605:6: warning: symbol 'pti_set_kernel_image_nonglobal' was not declared. Should it be static? CC arch/x86/mm/pti.o arch/x86/mm/pti.c:605:6: warning: no previous prototype for 'pti_set_kernel_image_nonglobal' [-Wmissing-prototypes] 605 | void pti_set_kernel_image_nonglobal(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pti_set_kernel_image_nonglobal() is only used locally. 'pti_mode' exists in drivers/hwtracing/intel_th/pti.c as well, but it's a completely unrelated local (static) symbol. Make both static. Signed-off-by: Valdis Kletnieks Signed-off-by: Thomas Gleixner Cc: Dave Hansen Cc: Andy Lutomirski Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/27680.1552376873@turing-police --- arch/x86/mm/pti.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 4fee5c3003ed..139b28a01ce4 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -77,7 +77,7 @@ static void __init pti_print_if_secure(const char *reason) pr_info("%s\n", reason); } -enum pti_mode { +static enum pti_mode { PTI_AUTO = 0, PTI_FORCE_OFF, PTI_FORCE_ON @@ -602,7 +602,7 @@ static void pti_clone_kernel_text(void) set_memory_global(start, (end_global - start) >> PAGE_SHIFT); } -void pti_set_kernel_image_nonglobal(void) +static void pti_set_kernel_image_nonglobal(void) { /* * The identity map is created with PMDs, regardless of the -- cgit From bb2e320565f997273fe04035bb6c17f643da6f8a Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 12 Mar 2019 04:17:56 -0400 Subject: genirq/devres: Remove excess parameter from kernel doc Building with 'make W=1' complains: CC kernel/irq/devres.o kernel/irq/devres.c:104: warning: Excess function parameter 'thread_fn' description in 'devm_request_any_context_irq' Remove it. Signed-off-by: Valdis Kletnieks Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/31207.1552378676@turing-police --- kernel/irq/devres.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index 5d5378ea0afe..f808c6a97dcc 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -84,8 +84,6 @@ EXPORT_SYMBOL(devm_request_threaded_irq); * @dev: device to request interrupt for * @irq: Interrupt line to allocate * @handler: Function to be called when the IRQ occurs - * @thread_fn: function to be called in a threaded interrupt context. NULL - * for devices which handle everything in @handler * @irqflags: Interrupt type flags * @devname: An ascii name for the claiming device, dev_name(dev) if NULL * @dev_id: A cookie passed back to the handler function -- cgit From e8750053d64a3317cbc15f8341f0f11ca751bfeb Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 12 Mar 2019 04:38:35 -0400 Subject: time/jiffies: Make refined_jiffies static sparse complains: CHECK kernel/time/jiffies.c kernel/time/jiffies.c:92:20: warning: symbol 'refined_jiffies' was not declared. Should it be static? Its only used in file scope. Make it static. Signed-off-by: Valdis Kletnieks Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/32342.1552379915@turing-police --- kernel/time/jiffies.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index dc1b6f1929f9..ac9c03dd6c7d 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -89,7 +89,7 @@ struct clocksource * __init __weak clocksource_default_clock(void) return &clocksource_jiffies; } -struct clocksource refined_jiffies; +static struct clocksource refined_jiffies; int register_refined_jiffies(long cycles_per_second) { -- cgit From 48084abf212052ca1d39fae064c581b1ce5b1fdf Mon Sep 17 00:00:00 2001 From: Valdis Kletnieks Date: Tue, 12 Mar 2019 05:33:48 -0400 Subject: watchdog/core: Make variables static sparse complains: CHECK kernel/watchdog.c kernel/watchdog.c:45:19: warning: symbol 'nmi_watchdog_available' was not declared. Should it be static? kernel/watchdog.c:47:16: warning: symbol 'watchdog_allowed_mask' was not declared. Should it be static? They're not referenced by name from anyplace else, make them static. Signed-off-by: Valdis Kletnieks Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/7855.1552383228@turing-police --- kernel/watchdog.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 8fbfda94a67b..403c9bd90413 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -42,9 +42,9 @@ int __read_mostly watchdog_user_enabled = 1; int __read_mostly nmi_watchdog_user_enabled = NMI_WATCHDOG_DEFAULT; int __read_mostly soft_watchdog_user_enabled = 1; int __read_mostly watchdog_thresh = 10; -int __read_mostly nmi_watchdog_available; +static int __read_mostly nmi_watchdog_available; -struct cpumask watchdog_allowed_mask __read_mostly; +static struct cpumask watchdog_allowed_mask __read_mostly; struct cpumask watchdog_cpumask __read_mostly; unsigned long *watchdog_cpumask_bits = cpumask_bits(&watchdog_cpumask); -- cgit From 7cf77b273a8fc51e7de622fa6691abd4436a9a6b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 11 Feb 2019 11:51:20 +0100 Subject: drm/tegra: hub: Fix dereference before check Reported-by: Dan Carpenter Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/hub.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c index ba9b3cfb8c3d..b3436c2aed68 100644 --- a/drivers/gpu/drm/tegra/hub.c +++ b/drivers/gpu/drm/tegra/hub.c @@ -378,14 +378,16 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane, static void tegra_shared_plane_atomic_disable(struct drm_plane *plane, struct drm_plane_state *old_state) { - struct tegra_dc *dc = to_tegra_dc(old_state->crtc); struct tegra_plane *p = to_tegra_plane(plane); + struct tegra_dc *dc; u32 value; /* rien ne va plus */ if (!old_state || !old_state->crtc) return; + dc = to_tegra_dc(old_state->crtc); + /* * XXX Legacy helpers seem to sometimes call ->atomic_disable() even * on planes that are already disabled. Make sure we fallback to the -- cgit From 509869a2fec36ecb2b841180915995f41d5a0219 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Mon, 18 Feb 2019 12:00:50 +0100 Subject: drm/tegra: vic: Fix implicit function declaration warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When CONFIG_IOMMU_API isn't set the following warnings pops up: drivers/gpu/drm/tegra/vic.c: In function ‘vic_boot’: drivers/gpu/drm/tegra/vic.c:110:31: error: implicit declaration of function ‘dev_iommu_fwspec_get’; did you mean ‘iommu_fwspec_free’? [-Werror=implicit-function-declaration] struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev); ^~~~~~~~~~~~~~~~~~~~ iommu_fwspec_free drivers/gpu/drm/tegra/vic.c:110:31: warning: initialization of ‘struct iommu_fwspec *’ from ‘int’ makes pointer from integer without a cast [-Wint-conversion] drivers/gpu/drm/tegra/vic.c:117:19: error: ‘struct iommu_fwspec’ has no member named ‘num_ids’ if (spec && spec->num_ids > 0) { ^~ drivers/gpu/drm/tegra/vic.c:118:16: error: ‘struct iommu_fwspec’ has no member named ‘ids’ value = spec->ids[0] & 0xffff; ^~ Rework so that its inside a '#ifdef CONFIG_IOMMU_API' block. Fixes: f3779cb190a5 ("drm/tegra: vic: Support stream ID register programming") Signed-off-by: Anders Roxell Signed-off-by: Thierry Reding --- drivers/gpu/drm/tegra/vic.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c index 39bfed9623de..982ce37ecde1 100644 --- a/drivers/gpu/drm/tegra/vic.c +++ b/drivers/gpu/drm/tegra/vic.c @@ -106,6 +106,7 @@ static int vic_boot(struct vic *vic) if (vic->booted) return 0; +#ifdef CONFIG_IOMMU_API if (vic->config->supports_sid) { struct iommu_fwspec *spec = dev_iommu_fwspec_get(vic->dev); u32 value; @@ -121,6 +122,7 @@ static int vic_boot(struct vic *vic) vic_writel(vic, value, VIC_THI_STREAMID1); } } +#endif /* setup clockgating registers */ vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) | -- cgit From c8248c6c1a3d5db944753dd8e1c143d92c2c74fc Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Thu, 21 Mar 2019 21:23:14 +0100 Subject: r8169: don't read interrupt mask register in interrupt handler After the original patch network starts to crash on heavy load. It's not fully clear why this additional register read has such side effects, but removing it fixes the issue. Thanks also to Alex for his contribution and hints. [0] https://marc.info/?t=155268170400002&r=1&w=2 Fixes: e782410ed237 ("r8169: improve spurious interrupt detection") Reported-by: VDR User Tested-by: VDR User Signed-off-by: Heiner Kallweit Reviewed-by: Alexander Duyck Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index c29dde064078..9dd1cd2c0c68 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -678,6 +678,7 @@ struct rtl8169_private { struct work_struct work; } wk; + unsigned irq_enabled:1; unsigned supports_gmii:1; dma_addr_t counters_phys_addr; struct rtl8169_counters *counters; @@ -1293,6 +1294,7 @@ static void rtl_ack_events(struct rtl8169_private *tp, u16 bits) static void rtl_irq_disable(struct rtl8169_private *tp) { RTL_W16(tp, IntrMask, 0); + tp->irq_enabled = 0; } #define RTL_EVENT_NAPI_RX (RxOK | RxErr) @@ -1301,6 +1303,7 @@ static void rtl_irq_disable(struct rtl8169_private *tp) static void rtl_irq_enable(struct rtl8169_private *tp) { + tp->irq_enabled = 1; RTL_W16(tp, IntrMask, tp->irq_mask); } @@ -6520,9 +6523,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) { struct rtl8169_private *tp = dev_instance; u16 status = RTL_R16(tp, IntrStatus); - u16 irq_mask = RTL_R16(tp, IntrMask); - if (status == 0xffff || !(status & irq_mask)) + if (!tp->irq_enabled || status == 0xffff || !(status & tp->irq_mask)) return IRQ_NONE; if (unlikely(status & SYSErr)) { -- cgit From ca0214ee2802dd47239a4e39fb21c5b00ef61b22 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Mar 2019 16:00:54 +0100 Subject: ALSA: pcm: Fix possible OOB access in PCM oss plugins The PCM OSS emulation converts and transfers the data on the fly via "plugins". The data is converted over the dynamically allocated buffer for each plugin, and recently syzkaller caught OOB in this flow. Although the bisection by syzbot pointed out to the commit 65766ee0bf7f ("ALSA: oss: Use kvzalloc() for local buffer allocations"), this is merely a commit to replace vmalloc() with kvmalloc(), hence it can't be the cause. The further debug action revealed that this happens in the case where a slave PCM doesn't support only the stereo channels while the OSS stream is set up for a mono channel. Below is a brief explanation: At each OSS parameter change, the driver sets up the PCM hw_params again in snd_pcm_oss_change_params_lock(). This is also the place where plugins are created and local buffers are allocated. The problem is that the plugins are created before the final hw_params is determined. Namely, two snd_pcm_hw_param_near() calls for setting the period size and periods may influence on the final result of channels, rates, etc, too, while the current code has already created plugins beforehand with the premature values. So, the plugin believes that channels=1, while the actual I/O is with channels=2, which makes the driver reading/writing over the allocated buffer size. The fix is simply to move the plugin allocation code after the final hw_params call. Reported-by: syzbot+d4503ae45b65c5bc1194@syzkaller.appspotmail.com Cc: Signed-off-by: Takashi Iwai --- sound/core/oss/pcm_oss.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index d5b0d7ba83c4..f6ae68017608 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -940,6 +940,28 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) oss_frame_size = snd_pcm_format_physical_width(params_format(params)) * params_channels(params) / 8; + err = snd_pcm_oss_period_size(substream, params, sparams); + if (err < 0) + goto failure; + + n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); + if (err < 0) + goto failure; + + err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, + runtime->oss.periods, NULL); + if (err < 0) + goto failure; + + snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); + + err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams); + if (err < 0) { + pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); + goto failure; + } + #ifdef CONFIG_SND_PCM_OSS_PLUGINS snd_pcm_oss_plugin_clear(substream); if (!direct) { @@ -974,27 +996,6 @@ static int snd_pcm_oss_change_params_locked(struct snd_pcm_substream *substream) } #endif - err = snd_pcm_oss_period_size(substream, params, sparams); - if (err < 0) - goto failure; - - n = snd_pcm_plug_slave_size(substream, runtime->oss.period_bytes / oss_frame_size); - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIOD_SIZE, n, NULL); - if (err < 0) - goto failure; - - err = snd_pcm_hw_param_near(substream, sparams, SNDRV_PCM_HW_PARAM_PERIODS, - runtime->oss.periods, NULL); - if (err < 0) - goto failure; - - snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_DROP, NULL); - - if ((err = snd_pcm_kernel_ioctl(substream, SNDRV_PCM_IOCTL_HW_PARAMS, sparams)) < 0) { - pcm_dbg(substream->pcm, "HW_PARAMS failed: %i\n", err); - goto failure; - } - if (runtime->oss.trigger) { sw_params->start_threshold = 1; } else { -- cgit From 7ecced0934e574b528a1ba6c237731e682216a74 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Fri, 8 Mar 2019 22:07:57 -0600 Subject: gpio: exar: add a check for the return value of ida_simple_get fails ida_simple_get may fail and return a negative error number. The fix checks its return value; if it fails, go to err_destroy. Cc: Signed-off-by: Kangjie Lu Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-exar.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c index 0ecd2369c2ca..a09d2f9ebacc 100644 --- a/drivers/gpio/gpio-exar.c +++ b/drivers/gpio/gpio-exar.c @@ -148,6 +148,8 @@ static int gpio_exar_probe(struct platform_device *pdev) mutex_init(&exar_gpio->lock); index = ida_simple_get(&ida_index, 0, 0, GFP_KERNEL); + if (index < 0) + goto err_destroy; sprintf(exar_gpio->name, "exar_gpio%d", index); exar_gpio->gpio_chip.label = exar_gpio->name; -- cgit From c5bc6e526d3f217ed2cc3681d256dc4a2af4cc2b Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Mon, 11 Mar 2019 21:29:37 +0800 Subject: gpio: adnp: Fix testing wrong value in adnp_gpio_direction_input Current code test wrong value so it does not verify if the written data is correctly read back. Fix it. Also make it return -EPERM if read value does not match written bit, just like it done for adnp_gpio_direction_output(). Fixes: 5e969a401a01 ("gpio: Add Avionic Design N-bit GPIO expander support") Cc: Signed-off-by: Axel Lin Reviewed-by: Thierry Reding Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-adnp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpio-adnp.c b/drivers/gpio/gpio-adnp.c index 91b90c0cea73..12acdac85820 100644 --- a/drivers/gpio/gpio-adnp.c +++ b/drivers/gpio/gpio-adnp.c @@ -132,8 +132,10 @@ static int adnp_gpio_direction_input(struct gpio_chip *chip, unsigned offset) if (err < 0) goto out; - if (err & BIT(pos)) - err = -EACCES; + if (value & BIT(pos)) { + err = -EPERM; + goto out; + } err = 0; -- cgit From b45a02e13ee74b6fde56df4d76786058821a3aba Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 19 Mar 2019 15:54:16 +0100 Subject: gpio: amd-fch: Fix bogus SPDX identifier spdxcheck.py complains: include/linux/platform_data/gpio/gpio-amd-fch.h: 1:28 Invalid License ID: GPL+ which is correct because GPL+ is not a valid identifier. Of course this could have been caught by checkpatch.pl _before_ submitting or merging the patch. WARNING: 'SPDX-License-Identifier: GPL+ */' is not supported in LICENSES/... #271: FILE: include/linux/platform_data/gpio/gpio-amd-fch.h:1: +/* SPDX-License-Identifier: GPL+ */ Fix it under the assumption that the author meant GPL-2.0+, which makes sense as the corresponding C file is using that identifier. Fixes: e09d168f13f0 ("gpio: AMD G-Series PCH gpio driver") Signed-off-by: Thomas Gleixner Signed-off-by: Bartosz Golaszewski --- include/linux/platform_data/gpio/gpio-amd-fch.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h index a867637e172d..9e46678edb2a 100644 --- a/include/linux/platform_data/gpio/gpio-amd-fch.h +++ b/include/linux/platform_data/gpio/gpio-amd-fch.h @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL+ */ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * AMD FCH gpio driver platform-data -- cgit From e898e69d6b9475bf123f99b3c5d1a67bb7cb2361 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 14:27:56 -0700 Subject: x86/hw_breakpoints: Make default case in hw_breakpoint_arch_parse() return an error When building with -Wsometimes-uninitialized, Clang warns: arch/x86/kernel/hw_breakpoint.c:355:2: warning: variable 'align' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] The default cannot be reached because arch_build_bp_info() initializes hw->len to one of the specified cases. Nevertheless the warning is valid and returning -EINVAL makes sure that this cannot be broken by future modifications. Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Reviewed-by: Nick Desaulniers Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/392 Link: https://lkml.kernel.org/r/20190307212756.4648-1-natechancellor@gmail.com --- arch/x86/kernel/hw_breakpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index ff9bfd40429e..d73083021002 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -354,6 +354,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, #endif default: WARN_ON_ONCE(1); + return -EINVAL; } /* -- cgit From 6cbcf596934c8e16d6288c7cc62dfb7ad8eadf15 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 22 Mar 2019 17:50:15 +0200 Subject: xhci: Fix port resume done detection for SS ports with LPM enabled A suspended SS port in U3 link state will go to U0 when resumed, but can almost immediately after that enter U1 or U2 link power save states before host controller driver reads the port status. Host controller driver only checks for U0 state, and might miss the finished resume, leaving flags unclear and skip notifying usb code of the wake. Add U1 and U2 to the possible link states when checking for finished port resume. Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 40fa25c4d041..9215a28dad40 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -1647,10 +1647,13 @@ static void handle_port_status(struct xhci_hcd *xhci, } } - if ((portsc & PORT_PLC) && (portsc & PORT_PLS_MASK) == XDEV_U0 && - DEV_SUPERSPEED_ANY(portsc)) { + if ((portsc & PORT_PLC) && + DEV_SUPERSPEED_ANY(portsc) && + ((portsc & PORT_PLS_MASK) == XDEV_U0 || + (portsc & PORT_PLS_MASK) == XDEV_U1 || + (portsc & PORT_PLS_MASK) == XDEV_U2)) { xhci_dbg(xhci, "resume SS port %d finished\n", port_id); - /* We've just brought the device into U0 through either the + /* We've just brought the device into U0/1/2 through either the * Resume state after a device remote wakeup, or through the * U3Exit state after a host-initiated resume. If it's a device * initiated remote wake, don't pass up the link state change, -- cgit From 8867ea262196a6945c24a0fb739575af646ec0e9 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 22 Mar 2019 17:50:16 +0200 Subject: usb: xhci: dbc: Don't free all memory with spinlock held The xhci debug capability (DbC) feature did its memory cleanup with spinlock held. dma_free_coherent() warns if called with interrupts disabled move the memory cleanup outside the spinlock Cc: stable Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-dbgcap.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/usb/host/xhci-dbgcap.c b/drivers/usb/host/xhci-dbgcap.c index c78be578abb0..d932cc31711e 100644 --- a/drivers/usb/host/xhci-dbgcap.c +++ b/drivers/usb/host/xhci-dbgcap.c @@ -516,7 +516,6 @@ static int xhci_do_dbc_stop(struct xhci_hcd *xhci) return -1; writel(0, &dbc->regs->control); - xhci_dbc_mem_cleanup(xhci); dbc->state = DS_DISABLED; return 0; @@ -562,8 +561,10 @@ static void xhci_dbc_stop(struct xhci_hcd *xhci) ret = xhci_do_dbc_stop(xhci); spin_unlock_irqrestore(&dbc->lock, flags); - if (!ret) + if (!ret) { + xhci_dbc_mem_cleanup(xhci); pm_runtime_put_sync(xhci_to_hcd(xhci)->self.controller); + } } static void -- cgit From d92f2c59cc2cbca6bfb2cc54882b58ba76b15fd4 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Fri, 22 Mar 2019 17:50:17 +0200 Subject: xhci: Don't let USB3 ports stuck in polling state prevent suspend Commit 2f31a67f01a8 ("usb: xhci: Prevent bus suspend if a port connect change or polling state is detected") was intended to prevent ports that were still link training from being forced to U3 suspend state mid enumeration. This solved enumeration issues for devices with slow link training. Turns out some devices are stuck in the link training/polling state, and thus that patch will prevent suspend completely for these devices. This is seen with USB3 card readers in some MacBooks. Instead of preventing suspend, give some time to complete the link training. On successful training the port will end up as connected and enabled. If port instead is stuck in link training the bus suspend will continue suspending after 360ms (10 * 36ms) timeout (tPollingLFPSTimeout). Original patch was sent to stable, this one should go there as well Fixes: 2f31a67f01a8 ("usb: xhci: Prevent bus suspend if a port connect change or polling state is detected") Cc: stable@vger.kernel.org Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-hub.c | 19 ++++++++++++------- drivers/usb/host/xhci.h | 8 ++++++++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index e2eece693655..96a740543183 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -1545,20 +1545,25 @@ int xhci_bus_suspend(struct usb_hcd *hcd) port_index = max_ports; while (port_index--) { u32 t1, t2; - + int retries = 10; +retry: t1 = readl(ports[port_index]->addr); t2 = xhci_port_state_to_neutral(t1); portsc_buf[port_index] = 0; - /* Bail out if a USB3 port has a new device in link training */ - if ((hcd->speed >= HCD_USB3) && + /* + * Give a USB3 port in link training time to finish, but don't + * prevent suspend as port might be stuck + */ + if ((hcd->speed >= HCD_USB3) && retries-- && (t1 & PORT_PLS_MASK) == XDEV_POLLING) { - bus_state->bus_suspended = 0; spin_unlock_irqrestore(&xhci->lock, flags); - xhci_dbg(xhci, "Bus suspend bailout, port in polling\n"); - return -EBUSY; + msleep(XHCI_PORT_POLLING_LFPS_TIME); + spin_lock_irqsave(&xhci->lock, flags); + xhci_dbg(xhci, "port %d polling in bus suspend, waiting\n", + port_index); + goto retry; } - /* suspend ports in U0, or bail out for new connect changes */ if ((t1 & PORT_PE) && (t1 & PORT_PLS_MASK) == XDEV_U0) { if ((t1 & PORT_CSC) && wake_enabled) { diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h index 652dc36e3012..9334cdee382a 100644 --- a/drivers/usb/host/xhci.h +++ b/drivers/usb/host/xhci.h @@ -452,6 +452,14 @@ struct xhci_op_regs { */ #define XHCI_DEFAULT_BESL 4 +/* + * USB3 specification define a 360ms tPollingLFPSTiemout for USB3 ports + * to complete link training. usually link trainig completes much faster + * so check status 10 times with 36ms sleep in places we need to wait for + * polling to complete. + */ +#define XHCI_PORT_POLLING_LFPS_TIME 36 + /** * struct xhci_intr_reg - Interrupt Register Set * @irq_pending: IMAN - Interrupt Management Register. Used to enable -- cgit From 1e4471e74c75acb3f89959ffa02a241227937ae2 Mon Sep 17 00:00:00 2001 From: Shenghui Wang Date: Sat, 16 Mar 2019 16:24:37 +0800 Subject: sbitmap: trivial - update comment for sbitmap_deferred_clear_bit "sbitmap_batch_clear" should be "sbitmap_deferred_clear" Acked-by: Omar Sandoval Signed-off-by: Shenghui Wang Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 14d558146aea..20f3e3f029b9 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -330,7 +330,7 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) /* * This one is special, since it doesn't actually clear the bit, rather it * sets the corresponding bit in the ->cleared mask instead. Paired with - * the caller doing sbitmap_batch_clear() if a given index is full, which + * the caller doing sbitmap_deferred_clear() if a given index is full, which * will clear the previously freed entries in the corresponding ->word. */ static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) -- cgit From 4fc90fb883fcb72d6bfbf84d554a3e820a05ef62 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Fri, 22 Mar 2019 15:51:36 +0100 Subject: ALSA: hda/ca0132 - Simplify alt firmware loading code ca0132 codec driver loads the firmware selectively depending on the model in addition to the fallback of the default firmware. The code works good, but a minor problem is that the current code seems confusing for Clang where it spews a warning about uninitialized variable. This patch simplifies the code flow for such a false-positive warning. After this refactoring, the ca0132_spec.alt_firmware_present field is no longer used, hence it's eliminated as well. Reported-and-tested-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_ca0132.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/sound/pci/hda/patch_ca0132.c b/sound/pci/hda/patch_ca0132.c index 29882bda7632..e1ebc6d5f382 100644 --- a/sound/pci/hda/patch_ca0132.c +++ b/sound/pci/hda/patch_ca0132.c @@ -1005,7 +1005,6 @@ struct ca0132_spec { unsigned int scp_resp_header; unsigned int scp_resp_data[4]; unsigned int scp_resp_count; - bool alt_firmware_present; bool startup_check_entered; bool dsp_reload; @@ -7518,7 +7517,7 @@ static bool ca0132_download_dsp_images(struct hda_codec *codec) bool dsp_loaded = false; struct ca0132_spec *spec = codec->spec; const struct dsp_image_seg *dsp_os_image; - const struct firmware *fw_entry; + const struct firmware *fw_entry = NULL; /* * Alternate firmwares for different variants. The Recon3Di apparently * can use the default firmware, but I'll leave the option in case @@ -7529,33 +7528,26 @@ static bool ca0132_download_dsp_images(struct hda_codec *codec) case QUIRK_R3D: case QUIRK_AE5: if (request_firmware(&fw_entry, DESKTOP_EFX_FILE, - codec->card->dev) != 0) { + codec->card->dev) != 0) codec_dbg(codec, "Desktop firmware not found."); - spec->alt_firmware_present = false; - } else { + else codec_dbg(codec, "Desktop firmware selected."); - spec->alt_firmware_present = true; - } break; case QUIRK_R3DI: if (request_firmware(&fw_entry, R3DI_EFX_FILE, - codec->card->dev) != 0) { + codec->card->dev) != 0) codec_dbg(codec, "Recon3Di alt firmware not detected."); - spec->alt_firmware_present = false; - } else { + else codec_dbg(codec, "Recon3Di firmware selected."); - spec->alt_firmware_present = true; - } break; default: - spec->alt_firmware_present = false; break; } /* * Use default ctefx.bin if no alt firmware is detected, or if none * exists for your particular codec. */ - if (!spec->alt_firmware_present) { + if (!fw_entry) { codec_dbg(codec, "Default firmware selected."); if (request_firmware(&fw_entry, EFX_FILE, codec->card->dev) != 0) -- cgit From d18a7408d7be0f34a120d99051ed5187d9727728 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Mar 2019 22:37:08 +0800 Subject: clocksource/drivers/clps711x: Make clps711x_clksrc_init() static Fix sparse warning: drivers/clocksource/clps711x-timer.c:96:13: warning: symbol 'clps711x_clksrc_init' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Link: https://lkml.kernel.org/r/20190322143708.12716-1-yuehaibing@huawei.com --- drivers/clocksource/clps711x-timer.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c index a8dd80576c95..cdc251524f5e 100644 --- a/drivers/clocksource/clps711x-timer.c +++ b/drivers/clocksource/clps711x-timer.c @@ -93,8 +93,9 @@ static int __init _clps711x_clkevt_init(struct clk *clock, void __iomem *base, "clps711x-timer", clkevt); } -void __init clps711x_clksrc_init(void __iomem *tc1_base, void __iomem *tc2_base, - unsigned int irq) +static void __init clps711x_clksrc_init(void __iomem *tc1_base, + void __iomem *tc2_base, + unsigned int irq) { struct clk *tc1 = clk_get_sys("clps711x-timer.0", NULL); struct clk *tc2 = clk_get_sys("clps711x-timer.1", NULL); -- cgit From bddee90af621914f08a03d546419fc293e9140d8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Mar 2019 22:39:40 +0800 Subject: clocksource/drivers/tcb_clksrc: Make tc_clksrc_suspend/resume() static Fix sparse warnings: drivers/clocksource/tcb_clksrc.c:74:6: warning: symbol 'tc_clksrc_suspend' was not declared. Should it be static? drivers/clocksource/tcb_clksrc.c:89:6: warning: symbol 'tc_clksrc_resume' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Link: https://lkml.kernel.org/r/20190322143940.12396-1-yuehaibing@huawei.com --- drivers/clocksource/tcb_clksrc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index 43f4d5c4d6fa..f987027ca566 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -71,7 +71,7 @@ static u64 tc_get_cycles32(struct clocksource *cs) return readl_relaxed(tcaddr + ATMEL_TC_REG(0, CV)); } -void tc_clksrc_suspend(struct clocksource *cs) +static void tc_clksrc_suspend(struct clocksource *cs) { int i; @@ -86,7 +86,7 @@ void tc_clksrc_suspend(struct clocksource *cs) bmr_cache = readl(tcaddr + ATMEL_TC_BMR); } -void tc_clksrc_resume(struct clocksource *cs) +static void tc_clksrc_resume(struct clocksource *cs) { int i; -- cgit From 008258d995a637c77c10a5d087d134eed49a6572 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Mar 2019 22:43:02 +0800 Subject: clocksource/drivers/timer-ti-dm: Make omap_dm_timer_set_load_start() static Fix sparse warning: drivers/clocksource/timer-ti-dm.c:589:5: warning: symbol 'omap_dm_timer_set_load_start' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Thomas Gleixner Cc: Link: https://lkml.kernel.org/r/20190322144302.6704-1-yuehaibing@huawei.com --- drivers/clocksource/timer-ti-dm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clocksource/timer-ti-dm.c b/drivers/clocksource/timer-ti-dm.c index c364027638e1..3352da6ed61f 100644 --- a/drivers/clocksource/timer-ti-dm.c +++ b/drivers/clocksource/timer-ti-dm.c @@ -586,8 +586,8 @@ static int omap_dm_timer_set_load(struct omap_dm_timer *timer, int autoreload, } /* Optimized set_load which removes costly spin wait in timer_start */ -int omap_dm_timer_set_load_start(struct omap_dm_timer *timer, int autoreload, - unsigned int load) +static int omap_dm_timer_set_load_start(struct omap_dm_timer *timer, + int autoreload, unsigned int load) { u32 l; -- cgit From 9039de4034775f4420bf01fa879f8c04b3cd6bba Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 22 Mar 2019 22:43:59 +0800 Subject: clocksource/drivers/mips-gic-timer: Make gic_compare_irqaction static Fix sparse warning: drivers/clocksource/mips-gic-timer.c:70:18: warning: symbol 'gic_compare_irqaction' was not declared. Should it be static? Signed-off-by: YueHaibing Signed-off-by: Thomas Gleixner Cc: Link: https://lkml.kernel.org/r/20190322144359.19516-1-yuehaibing@huawei.com --- drivers/clocksource/mips-gic-timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c index 54f8a331b53a..37671a5d4ed9 100644 --- a/drivers/clocksource/mips-gic-timer.c +++ b/drivers/clocksource/mips-gic-timer.c @@ -67,7 +67,7 @@ static irqreturn_t gic_compare_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -struct irqaction gic_compare_irqaction = { +static struct irqaction gic_compare_irqaction = { .handler = gic_compare_interrupt, .percpu_dev_id = &gic_clockevent_device, .flags = IRQF_PERCPU | IRQF_TIMER, -- cgit From d53e292f0f505783d0219f58f8f8f294f45f4ee6 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 15 Mar 2019 07:54:59 +0000 Subject: CIFS: Fix an issue with re-sending wdata when transport returning -EAGAIN When sending a wdata, transport may return -EAGAIN. In this case we should re-obtain credits because the session may have been reconnected. Change in v2: adjust_credits before re-sending Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/file.c | 77 ++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 32 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2a6d20c0ce02..2aa3e62af764 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2632,43 +2632,56 @@ cifs_resend_wdata(struct cifs_writedata *wdata, struct list_head *wdata_list, struct TCP_Server_Info *server = tlink_tcon(wdata->cfile->tlink)->ses->server; - /* - * Wait for credits to resend this wdata. - * Note: we are attempting to resend the whole wdata not in segments - */ do { - rc = server->ops->wait_mtu_credits(server, wdata->bytes, &wsize, - &credits); + if (wdata->cfile->invalidHandle) { + rc = cifs_reopen_file(wdata->cfile, false); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } - if (rc) - goto out; - if (wsize < wdata->bytes) { - add_credits_and_wake_if(server, &credits, 0); - msleep(1000); - } - } while (wsize < wdata->bytes); + /* + * Wait for credits to resend this wdata. + * Note: we are attempting to resend the whole wdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, wdata->bytes, + &wsize, &credits); + if (rc) + goto fail; + + if (wsize < wdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (wsize < wdata->bytes); + wdata->credits = credits; - wdata->credits = credits; - rc = -EAGAIN; - while (rc == -EAGAIN) { - rc = 0; - if (wdata->cfile->invalidHandle) - rc = cifs_reopen_file(wdata->cfile, false); - if (!rc) - rc = server->ops->async_writev(wdata, + rc = adjust_credits(server, &wdata->credits, wdata->bytes); + + if (!rc) { + if (wdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_writev(wdata, cifs_uncached_writedata_release); - } + } - if (!rc) { - list_add_tail(&wdata->list, wdata_list); - return 0; - } + /* If the write was successfully sent, we are done */ + if (!rc) { + list_add_tail(&wdata->list, wdata_list); + return 0; + } - add_credits_and_wake_if(server, &wdata->credits, 0); -out: - kref_put(&wdata->refcount, cifs_uncached_writedata_release); + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &wdata->credits, 0); + } while (rc == -EAGAIN); +fail: + kref_put(&wdata->refcount, cifs_uncached_writedata_release); return rc; } @@ -2896,12 +2909,12 @@ restart_loop: wdata->bytes, &tmp_from, ctx->cfile, cifs_sb, &tmp_list, ctx); + + kref_put(&wdata->refcount, + cifs_uncached_writedata_release); } list_splice(&tmp_list, &ctx->list); - - kref_put(&wdata->refcount, - cifs_uncached_writedata_release); goto restart_loop; } } -- cgit From 0b0dfd59216755cfa5a47eab2811efaa4589db68 Mon Sep 17 00:00:00 2001 From: Long Li Date: Fri, 15 Mar 2019 07:55:00 +0000 Subject: CIFS: Fix an issue with re-sending rdata when transport returning -EAGAIN When sending a rdata, transport may return -EAGAIN. In this case we should re-obtain credits because the session may have been reconnected. Change in v2: adjust_credits before re-sending Signed-off-by: Long Li Signed-off-by: Steve French Reviewed-by: Pavel Shilovsky --- fs/cifs/file.c | 71 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 2aa3e62af764..89006e044973 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -3361,44 +3361,55 @@ static int cifs_resend_rdata(struct cifs_readdata *rdata, struct TCP_Server_Info *server = tlink_tcon(rdata->cfile->tlink)->ses->server; - /* - * Wait for credits to resend this rdata. - * Note: we are attempting to resend the whole rdata not in segments - */ do { - rc = server->ops->wait_mtu_credits(server, rdata->bytes, + if (rdata->cfile->invalidHandle) { + rc = cifs_reopen_file(rdata->cfile, true); + if (rc == -EAGAIN) + continue; + else if (rc) + break; + } + + /* + * Wait for credits to resend this rdata. + * Note: we are attempting to resend the whole rdata not in + * segments + */ + do { + rc = server->ops->wait_mtu_credits(server, rdata->bytes, &rsize, &credits); - if (rc) - goto out; + if (rc) + goto fail; - if (rsize < rdata->bytes) { - add_credits_and_wake_if(server, &credits, 0); - msleep(1000); - } - } while (rsize < rdata->bytes); + if (rsize < rdata->bytes) { + add_credits_and_wake_if(server, &credits, 0); + msleep(1000); + } + } while (rsize < rdata->bytes); + rdata->credits = credits; - rdata->credits = credits; - rc = -EAGAIN; - while (rc == -EAGAIN) { - rc = 0; - if (rdata->cfile->invalidHandle) - rc = cifs_reopen_file(rdata->cfile, true); - if (!rc) - rc = server->ops->async_readv(rdata); - } + rc = adjust_credits(server, &rdata->credits, rdata->bytes); + if (!rc) { + if (rdata->cfile->invalidHandle) + rc = -EAGAIN; + else + rc = server->ops->async_readv(rdata); + } - if (!rc) { - /* Add to aio pending list */ - list_add_tail(&rdata->list, rdata_list); - return 0; - } + /* If the read was successfully sent, we are done */ + if (!rc) { + /* Add to aio pending list */ + list_add_tail(&rdata->list, rdata_list); + return 0; + } - add_credits_and_wake_if(server, &rdata->credits, 0); -out: - kref_put(&rdata->refcount, - cifs_uncached_readdata_release); + /* Roll back credits and retry if needed */ + add_credits_and_wake_if(server, &rdata->credits, 0); + } while (rc == -EAGAIN); +fail: + kref_put(&rdata->refcount, cifs_uncached_readdata_release); return rc; } -- cgit From b073a08016a10f01dfb0d0b6c7fa89da0d544963 Mon Sep 17 00:00:00 2001 From: Xiaoli Feng Date: Sat, 16 Mar 2019 12:11:54 +0800 Subject: cifs: fix that return -EINVAL when do dedupe operation dedupe_file_range operations is combiled into remap_file_range. But it's always skipped for dedupe operations in function cifs_remap_file_range. Example to test: Before this patch: # dd if=/dev/zero of=cifs/file bs=1M count=1 # xfs_io -c "dedupe cifs/file 4k 64k 4k" cifs/file XFS_IOC_FILE_EXTENT_SAME: Invalid argument After this patch: # dd if=/dev/zero of=cifs/file bs=1M count=1 # xfs_io -c "dedupe cifs/file 4k 64k 4k" cifs/file XFS_IOC_FILE_EXTENT_SAME: Operation not supported Influence for xfstests: generic/091 generic/112 generic/127 generic/263 These tests report this error "do_copy_range:: Invalid argument" instead of "FIDEDUPERANGE: Invalid argument". Because there are still two bugs cause these test failed. https://bugzilla.kernel.org/show_bug.cgi?id=202935 https://bugzilla.kernel.org/show_bug.cgi?id=202785 Signed-off-by: Xiaoli Feng Signed-off-by: Steve French --- fs/cifs/cifsfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 217276b8b942..f9b71c12cc9f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1008,7 +1008,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, unsigned int xid; int rc; - if (remap_flags & ~REMAP_FILE_ADVISORY) + if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) return -EINVAL; cifs_dbg(FYI, "clone range\n"); -- cgit From 85f9987b236cf46e06ffdb5c225cf1f3c0acb789 Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 17 Mar 2019 15:58:38 -0500 Subject: fix incorrect error code mapping for OBJECTID_NOT_FOUND It was mapped to EIO which can be confusing when user space queries for an object GUID for an object for which the server file system doesn't support (or hasn't saved one). As Amir Goldstein suggested this is similar to ENOATTR (equivalently ENODATA in Linux errno definitions) so changing NT STATUS code mapping for OBJECTID_NOT_FOUND to ENODATA. Signed-off-by: Steve French CC: Amir Goldstein --- fs/cifs/smb2maperror.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index 924269cec135..e32c264e3adb 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -1036,7 +1036,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_UNFINISHED_CONTEXT_DELETED, -EIO, "STATUS_UNFINISHED_CONTEXT_DELETED"}, {STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"}, - {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"}, + /* Note that ENOATTTR and ENODATA are the same errno */ + {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"}, {STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"}, {STATUS_WRONG_CREDENTIAL_HANDLE, -EIO, "STATUS_WRONG_CREDENTIAL_HANDLE"}, -- cgit From e71ab2aa06f731a944993120b0eef1556c63b81c Mon Sep 17 00:00:00 2001 From: Ronnie Sahlberg Date: Thu, 21 Mar 2019 14:59:02 +1000 Subject: cifs: allow guest mounts to work for smb3.11 Fix Guest/Anonymous sessions so that they work with SMB 3.11. The commit noted below tightened the conditions and forced signing for the SMB2-TreeConnect commands as per MS-SMB2. However, this should only apply to normal user sessions and not for Guest/Anonumous sessions. Fixes: 6188f28bf608 ("Tree connect for SMB3.1.1 must be signed for non-encrypted shares") Signed-off-by: Ronnie Sahlberg CC: Stable Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index c399e09b76e6..8e4a1da95418 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1628,9 +1628,13 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, iov[1].iov_base = unc_path; iov[1].iov_len = unc_path_len; - /* 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 */ + /* + * 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 + * unless it is guest or anonymous user. See MS-SMB2 3.2.5.3.1 + */ if ((ses->server->dialect == SMB311_PROT_ID) && - !smb3_encryption_required(tcon)) + !smb3_encryption_required(tcon) && + !(ses->session_flags & (SMB2_SESSION_FLAG_IS_GUEST|SMB2_SESSION_FLAG_IS_NULL))) req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; memset(&rqst, 0, sizeof(struct smb_rqst)); -- cgit From 68ddb496800acdb46172b4981dc3753ea9b39c25 Mon Sep 17 00:00:00 2001 From: "Paulo Alcantara (SUSE)" Date: Thu, 21 Mar 2019 19:31:22 -0300 Subject: cifs: Fix slab-out-of-bounds when tracing SMB tcon This patch fixes the following KASAN report: [ 779.044746] BUG: KASAN: slab-out-of-bounds in string+0xab/0x180 [ 779.044750] Read of size 1 at addr ffff88814f327968 by task trace-cmd/2812 [ 779.044756] CPU: 1 PID: 2812 Comm: trace-cmd Not tainted 5.1.0-rc1+ #62 [ 779.044760] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.12.0-0-ga698c89-prebuilt.qemu.org 04/01/2014 [ 779.044761] Call Trace: [ 779.044769] dump_stack+0x5b/0x90 [ 779.044775] ? string+0xab/0x180 [ 779.044781] print_address_description+0x6c/0x23c [ 779.044787] ? string+0xab/0x180 [ 779.044792] ? string+0xab/0x180 [ 779.044797] kasan_report.cold.3+0x1a/0x32 [ 779.044803] ? string+0xab/0x180 [ 779.044809] string+0xab/0x180 [ 779.044816] ? widen_string+0x160/0x160 [ 779.044822] ? vsnprintf+0x5bf/0x7f0 [ 779.044829] vsnprintf+0x4e7/0x7f0 [ 779.044836] ? pointer+0x4a0/0x4a0 [ 779.044841] ? seq_buf_vprintf+0x79/0xc0 [ 779.044848] seq_buf_vprintf+0x62/0xc0 [ 779.044855] trace_seq_printf+0x113/0x210 [ 779.044861] ? trace_seq_puts+0x110/0x110 [ 779.044867] ? trace_raw_output_prep+0xd8/0x110 [ 779.044876] trace_raw_output_smb3_tcon_class+0x9f/0xc0 [ 779.044882] print_trace_line+0x377/0x890 [ 779.044888] ? tracing_buffers_read+0x300/0x300 [ 779.044893] ? ring_buffer_read+0x58/0x70 [ 779.044899] s_show+0x6e/0x140 [ 779.044906] seq_read+0x505/0x6a0 [ 779.044913] vfs_read+0xaf/0x1b0 [ 779.044919] ksys_read+0xa1/0x130 [ 779.044925] ? kernel_write+0xa0/0xa0 [ 779.044931] ? __do_page_fault+0x3d5/0x620 [ 779.044938] do_syscall_64+0x63/0x150 [ 779.044944] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 779.044949] RIP: 0033:0x7f62c2c2db31 [ 779.044955] Code: fe ff ff 48 8d 3d 17 9e 09 00 48 83 ec 08 e8 96 02 02 00 66 0f 1f 44 00 00 8b 05 fa fc 2c 00 48 63 ff 85 c0 75 13 31 c0 0f 05 <48> 3d 00 f0 ff ff 77 57 f3 c3 0f 1f 44 00 00 55 53 48 89 d5 48 89 [ 779.044958] RSP: 002b:00007ffd6e116678 EFLAGS: 00000246 ORIG_RAX: 0000000000000000 [ 779.044964] RAX: ffffffffffffffda RBX: 0000560a38be9260 RCX: 00007f62c2c2db31 [ 779.044966] RDX: 0000000000002000 RSI: 00007ffd6e116710 RDI: 0000000000000003 [ 779.044966] RDX: 0000000000002000 RSI: 00007ffd6e116710 RDI: 0000000000000003 [ 779.044969] RBP: 00007f62c2ef5420 R08: 0000000000000000 R09: 0000000000000003 [ 779.044972] R10: ffffffffffffffa8 R11: 0000000000000246 R12: 00007ffd6e116710 [ 779.044975] R13: 0000000000002000 R14: 0000000000000d68 R15: 0000000000002000 [ 779.044981] Allocated by task 1257: [ 779.044987] __kasan_kmalloc.constprop.5+0xc1/0xd0 [ 779.044992] kmem_cache_alloc+0xad/0x1a0 [ 779.044997] getname_flags+0x6c/0x2a0 [ 779.045003] user_path_at_empty+0x1d/0x40 [ 779.045008] do_faccessat+0x12a/0x330 [ 779.045012] do_syscall_64+0x63/0x150 [ 779.045017] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 779.045019] Freed by task 1257: [ 779.045023] __kasan_slab_free+0x12e/0x180 [ 779.045029] kmem_cache_free+0x85/0x1b0 [ 779.045034] filename_lookup.part.70+0x176/0x250 [ 779.045039] do_faccessat+0x12a/0x330 [ 779.045043] do_syscall_64+0x63/0x150 [ 779.045048] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 779.045052] The buggy address belongs to the object at ffff88814f326600 which belongs to the cache names_cache of size 4096 [ 779.045057] The buggy address is located 872 bytes to the right of 4096-byte region [ffff88814f326600, ffff88814f327600) [ 779.045058] The buggy address belongs to the page: [ 779.045062] page:ffffea00053cc800 count:1 mapcount:0 mapping:ffff88815b191b40 index:0x0 compound_mapcount: 0 [ 779.045067] flags: 0x200000000010200(slab|head) [ 779.045075] raw: 0200000000010200 dead000000000100 dead000000000200 ffff88815b191b40 [ 779.045081] raw: 0000000000000000 0000000000070007 00000001ffffffff 0000000000000000 [ 779.045083] page dumped because: kasan: bad access detected [ 779.045085] Memory state around the buggy address: [ 779.045089] ffff88814f327800: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045093] ffff88814f327880: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045097] >ffff88814f327900: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045099] ^ [ 779.045103] ffff88814f327980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045107] ffff88814f327a00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc [ 779.045109] ================================================================== [ 779.045110] Disabling lock debugging due to kernel taint Correctly assign tree name str for smb3_tcon event. Signed-off-by: Paulo Alcantara (SUSE) Signed-off-by: Steve French --- fs/cifs/trace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h index fa226de48ef3..99c4d799c24b 100644 --- a/fs/cifs/trace.h +++ b/fs/cifs/trace.h @@ -549,19 +549,19 @@ DECLARE_EVENT_CLASS(smb3_tcon_class, __field(unsigned int, xid) __field(__u32, tid) __field(__u64, sesid) - __field(const char *, unc_name) + __string(name, unc_name) __field(int, rc) ), TP_fast_assign( __entry->xid = xid; __entry->tid = tid; __entry->sesid = sesid; - __entry->unc_name = unc_name; + __assign_str(name, unc_name); __entry->rc = rc; ), TP_printk("xid=%u sid=0x%llx tid=0x%x unc_name=%s rc=%d", __entry->xid, __entry->sesid, __entry->tid, - __entry->unc_name, __entry->rc) + __get_str(name), __entry->rc) ) #define DEFINE_SMB3_TCON_EVENT(name) \ -- cgit From 8c11a607d1d9cd6e7f01fd6b03923597fb0ef95a Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 22 Mar 2019 22:31:17 -0500 Subject: SMB3: Fix SMB3.1.1 guest mounts to Samba Workaround problem with Samba responses to SMB3.1.1 null user (guest) mounts. The server doesn't set the expected flag in the session setup response so we have to do a similar check to what is done in smb3_validate_negotiate where we also check if the user is a null user (but not sec=krb5 since username might not be passed in on mount for Kerberos case). Note that the commit below tightened the conditions and forced signing for the SMB2-TreeConnect commands as per MS-SMB2. However, this should only apply to normal user sessions and not for cases where there is no user (even if server forgets to set the flag in the response) since we don't have anything useful to sign with. This is especially important now that the more secure SMB3.1.1 protocol is in the default dialect list. An earlier patch ("cifs: allow guest mounts to work for smb3.11") fixed the guest mounts to Windows. Fixes: 6188f28bf608 ("Tree connect for SMB3.1.1 must be signed for non-encrypted shares") Reviewed-by: Ronnie Sahlberg Reviewed-by: Paulo Alcantara CC: Stable Signed-off-by: Steve French --- fs/cifs/smb2pdu.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 8e4a1da95418..21ac19ff19cb 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -1631,10 +1631,13 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, /* * 3.11 tcon req must be signed if not encrypted. See MS-SMB2 3.2.4.1.1 * unless it is guest or anonymous user. See MS-SMB2 3.2.5.3.1 + * (Samba servers don't always set the flag so also check if null user) */ if ((ses->server->dialect == SMB311_PROT_ID) && !smb3_encryption_required(tcon) && - !(ses->session_flags & (SMB2_SESSION_FLAG_IS_GUEST|SMB2_SESSION_FLAG_IS_NULL))) + !(ses->session_flags & + (SMB2_SESSION_FLAG_IS_GUEST|SMB2_SESSION_FLAG_IS_NULL)) && + ((ses->user_name != NULL) || (ses->sectype == Kerberos))) req->sync_hdr.Flags |= SMB2_FLAGS_SIGNED; memset(&rqst, 0, sizeof(struct smb_rqst)); -- cgit From cf7d624f8dcc9b833a8489208b6ef6dcc5dd308b Mon Sep 17 00:00:00 2001 From: Steve French Date: Wed, 20 Mar 2019 16:42:50 -0500 Subject: cifs: update internal module version number To 2.19 Signed-off-by: Steve French --- fs/cifs/cifsfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 142164ef1f05..5c0298b9998f 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -150,5 +150,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "2.18" +#define CIFS_VERSION "2.19" #endif /* _CIFSFS_H */ -- cgit From ffc8599aa9763f39f6736a79da4d1575e7006f9a Mon Sep 17 00:00:00 2001 From: Kairui Song Date: Fri, 8 Mar 2019 11:05:08 +0800 Subject: x86/gart: Exclude GART aperture from kcore On machines where the GART aperture is mapped over physical RAM, /proc/kcore contains the GART aperture range. Accessing the GART range via /proc/kcore results in a kernel crash. vmcore used to have the same issue, until it was fixed with commit 2a3e83c6f96c ("x86/gart: Exclude GART aperture from vmcore")', leveraging existing hook infrastructure in vmcore to let /proc/vmcore return zeroes when attempting to read the aperture region, and so it won't read from the actual memory. Apply the same workaround for kcore. First implement the same hook infrastructure for kcore, then reuse the hook functions introduced in the previous vmcore fix. Just with some minor adjustment, rename some functions for more general usage, and simplify the hook infrastructure a bit as there is no module usage yet. Suggested-by: Baoquan He Signed-off-by: Kairui Song Signed-off-by: Thomas Gleixner Reviewed-by: Jiri Bohac Acked-by: Baoquan He Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Alexey Dobriyan Cc: Andrew Morton Cc: Omar Sandoval Cc: Dave Young Link: https://lkml.kernel.org/r/20190308030508.13548-1-kasong@redhat.com --- arch/x86/kernel/aperture_64.c | 20 +++++++++++++------- fs/proc/kcore.c | 27 +++++++++++++++++++++++++++ include/linux/kcore.h | 2 ++ 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 58176b56354e..294ed4392a0e 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -14,6 +14,7 @@ #define pr_fmt(fmt) "AGP: " fmt #include +#include #include #include #include @@ -57,7 +58,7 @@ int fallback_aper_force __initdata; int fix_aperture __initdata = 1; -#ifdef CONFIG_PROC_VMCORE +#if defined(CONFIG_PROC_VMCORE) || defined(CONFIG_PROC_KCORE) /* * If the first kernel maps the aperture over e820 RAM, the kdump kernel will * use the same range because it will remain configured in the northbridge. @@ -66,20 +67,25 @@ int fix_aperture __initdata = 1; */ static unsigned long aperture_pfn_start, aperture_page_count; -static int gart_oldmem_pfn_is_ram(unsigned long pfn) +static int gart_mem_pfn_is_ram(unsigned long pfn) { return likely((pfn < aperture_pfn_start) || (pfn >= aperture_pfn_start + aperture_page_count)); } -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void __init exclude_from_core(u64 aper_base, u32 aper_order) { aperture_pfn_start = aper_base >> PAGE_SHIFT; aperture_page_count = (32 * 1024 * 1024) << aper_order >> PAGE_SHIFT; - WARN_ON(register_oldmem_pfn_is_ram(&gart_oldmem_pfn_is_ram)); +#ifdef CONFIG_PROC_VMCORE + WARN_ON(register_oldmem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif +#ifdef CONFIG_PROC_KCORE + WARN_ON(register_mem_pfn_is_ram(&gart_mem_pfn_is_ram)); +#endif } #else -static void exclude_from_vmcore(u64 aper_base, u32 aper_order) +static void exclude_from_core(u64 aper_base, u32 aper_order) { } #endif @@ -474,7 +480,7 @@ out: * may have allocated the range over its e820 RAM * and fixed up the northbridge */ - exclude_from_vmcore(last_aper_base, last_aper_order); + exclude_from_core(last_aper_base, last_aper_order); return 1; } @@ -520,7 +526,7 @@ out: * overlap with the first kernel's memory. We can't access the * range through vmcore even though it should be part of the dump. */ - exclude_from_vmcore(aper_alloc, aper_order); + exclude_from_core(aper_alloc, aper_order); /* Fix up the north bridges */ for (i = 0; i < amd_nb_bus_dev_ranges[i].dev_limit; i++) { diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index bbcc185062bb..d29d869abec1 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -54,6 +54,28 @@ static LIST_HEAD(kclist_head); static DECLARE_RWSEM(kclist_lock); static int kcore_need_update = 1; +/* + * Returns > 0 for RAM pages, 0 for non-RAM pages, < 0 on error + * Same as oldmem_pfn_is_ram in vmcore + */ +static int (*mem_pfn_is_ram)(unsigned long pfn); + +int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)) +{ + if (mem_pfn_is_ram) + return -EBUSY; + mem_pfn_is_ram = fn; + return 0; +} + +static int pfn_is_ram(unsigned long pfn) +{ + if (mem_pfn_is_ram) + return mem_pfn_is_ram(pfn); + else + return 1; +} + /* This doesn't grab kclist_lock, so it should only be used at init time. */ void __init kclist_add(struct kcore_list *new, void *addr, size_t size, int type) @@ -465,6 +487,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) goto out; } m = NULL; /* skip the list anchor */ + } else if (!pfn_is_ram(__pa(start) >> PAGE_SHIFT)) { + if (clear_user(buffer, tsz)) { + ret = -EFAULT; + goto out; + } } else if (m->type == KCORE_VMALLOC) { vread(buf, (char *)start, tsz); /* we have to zero-fill user buffer even if no read */ diff --git a/include/linux/kcore.h b/include/linux/kcore.h index 8c3f8c14eeaa..c843f4a9c512 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -44,6 +44,8 @@ void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) m->vaddr = (unsigned long)vaddr; kclist_add(m, addr, sz, KCORE_REMAP); } + +extern int __init register_mem_pfn_is_ram(int (*fn)(unsigned long pfn)); #else static inline void kclist_add(struct kcore_list *new, void *addr, size_t size, int type) -- cgit From 32d0be018f6f5ee2d5d19c4795304613560814cf Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Fri, 22 Mar 2019 14:54:11 -0700 Subject: clocksource/drivers/riscv: Fix clocksource mask For all riscv architectures (RV32, RV64 and RV128), the clocksource is a 64 bit incrementing counter. Fix the clock source mask accordingly. Tested on both 64bit and 32 bit virt machine in QEMU. Fixes: 62b019436814 ("clocksource: new RISC-V SBI timer driver") Signed-off-by: Atish Patra Signed-off-by: Thomas Gleixner Reviewed-by: Anup Patel Cc: Albert Ou Cc: Daniel Lezcano Cc: linux-riscv@lists.infradead.org Cc: Palmer Dabbelt Cc: Anup Patel Cc: Damien Le Moal Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190322215411.19362-1-atish.patra@wdc.com --- drivers/clocksource/timer-riscv.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/clocksource/timer-riscv.c b/drivers/clocksource/timer-riscv.c index e8163693e936..5e6038fbf115 100644 --- a/drivers/clocksource/timer-riscv.c +++ b/drivers/clocksource/timer-riscv.c @@ -58,7 +58,7 @@ static u64 riscv_sched_clock(void) static DEFINE_PER_CPU(struct clocksource, riscv_clocksource) = { .name = "riscv_clocksource", .rating = 300, - .mask = CLOCKSOURCE_MASK(BITS_PER_LONG), + .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, .read = riscv_clocksource_rdtime, }; @@ -120,8 +120,7 @@ static int __init riscv_timer_init_dt(struct device_node *n) return error; } - sched_clock_register(riscv_sched_clock, - BITS_PER_LONG, riscv_timebase); + sched_clock_register(riscv_sched_clock, 64, riscv_timebase); error = cpuhp_setup_state(CPUHP_AP_RISCV_TIMER_STARTING, "clockevents/riscv/timer:starting", -- cgit From 93417a3fda2060f2a34e3341904024c5b6980d1f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 28 Feb 2019 15:37:14 -0600 Subject: genirq: Mark expected switch case fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. With -Wimplicit-fallthrough added to CFLAGS: kernel/irq/manage.c: In function ‘irq_do_set_affinity’: kernel/irq/manage.c:198:3: warning: this statement may fall through [-Wimplicit-fallthrough=] cpumask_copy(desc->irq_common_data.affinity, mask); ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ kernel/irq/manage.c:199:2: note: here case IRQ_SET_MASK_OK_NOCOPY: ^~~~ Annotate it. Signed-off-by: Gustavo A. R. Silva Signed-off-by: Thomas Gleixner Cc: Kees Cook Link: https://lkml.kernel.org/r/20190228213714.GA9246@embeddedor --- kernel/irq/manage.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 9ec34a2a6638..1401afa0d58a 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -196,6 +196,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask, case IRQ_SET_MASK_OK: case IRQ_SET_MASK_OK_DONE: cpumask_copy(desc->irq_common_data.affinity, mask); + /* fall through */ case IRQ_SET_MASK_OK_NOCOPY: irq_validate_effective_affinity(data); irq_set_thread_affinity(desc); -- cgit From 674a2b27234d1b7afcb0a9162e81b2e53aeef217 Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 23 Mar 2019 11:43:05 -0400 Subject: ext4: brelse all indirect buffer in ext4_ind_remove_space() All indirect buffers get by ext4_find_shared() should be released no mater the branch should be freed or not. But now, we forget to release the lower depth indirect buffers when removing space from the same higher depth indirect block. It will lead to buffer leak and futher more, it may lead to quota information corruption when using old quota, consider the following case. - Create and mount an empty ext4 filesystem without extent and quota features, - quotacheck and enable the user & group quota, - Create some files and write some data to them, and then punch hole to some files of them, it may trigger the buffer leak problem mentioned above. - Disable quota and run quotacheck again, it will create two new aquota files and write the checked quota information to them, which probably may reuse the freed indirect block(the buffer and page cache was not freed) as data block. - Enable quota again, it will invoke vfs_load_quota_inode()->invalidate_bdev() to try to clean unused buffers and pagecache. Unfortunately, because of the buffer of quota data block is still referenced, quota code cannot read the up to date quota info from the device and lead to quota information corruption. This problem can be reproduced by xfstests generic/231 on ext3 file system or ext4 file system without extent and quota features. This patch fix this problem by releasing the missing indirect buffers, in ext4_ind_remove_space(). Reported-by: Hulk Robot Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@kernel.org --- fs/ext4/indirect.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index c2225f0d31b5..02c39524bda2 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1390,10 +1390,14 @@ end_range: partial->p + 1, partial2->p, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); + while (partial > chain) { + BUFFER_TRACE(partial->bh, "call brelse"); + brelse(partial->bh); + } + while (partial2 > chain2) { + BUFFER_TRACE(partial2->bh, "call brelse"); + brelse(partial2->bh); + } return 0; } -- cgit From d84dd3fb82fa7a094de7f08f10610d55a70cf0ca Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 19 Mar 2019 11:24:54 -0400 Subject: SUNRPC: Don't let RPC_SOFTCONN tasks time out if the transport is connected If the transport is still connected, then we do want to allow RPC_SOFTCONN tasks to retry. They should time out if and only if the connection is broken. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 228970e6e52b..187d10443a15 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2311,6 +2311,15 @@ out_exit: rpc_exit(task, status); } +static bool +rpc_check_connected(const struct rpc_rqst *req) +{ + /* No allocated request or transport? return true */ + if (!req || !req->rq_xprt) + return true; + return xprt_connected(req->rq_xprt); +} + static void rpc_check_timeout(struct rpc_task *task) { @@ -2322,10 +2331,11 @@ rpc_check_timeout(struct rpc_task *task) dprintk("RPC: %5u call_timeout (major)\n", task->tk_pid); task->tk_timeouts++; - if (RPC_IS_SOFTCONN(task)) { + if (RPC_IS_SOFTCONN(task) && !rpc_check_connected(task->tk_rqstp)) { rpc_exit(task, -ETIMEDOUT); return; } + if (RPC_IS_SOFT(task)) { if (clnt->cl_chatty) { printk(KERN_NOTICE "%s: server %s not responding, timed out\n", -- cgit From 5e86bdda41534e17621d5a071b294943cae4376e Mon Sep 17 00:00:00 2001 From: "zhangyi (F)" Date: Sat, 23 Mar 2019 11:56:01 -0400 Subject: ext4: cleanup bh release code in ext4_ind_remove_space() Currently, we are releasing the indirect buffer where we are done with it in ext4_ind_remove_space(), so we can see the brelse() and BUFFER_TRACE() everywhere. It seems fragile and hard to read, and we may probably forget to release the buffer some day. This patch cleans up the code by putting of the code which releases the buffers to the end of the function. Signed-off-by: zhangyi (F) Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/indirect.c | 47 ++++++++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 02c39524bda2..2024d3fa5504 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -1222,6 +1222,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, ext4_lblk_t offsets[4], offsets2[4]; Indirect chain[4], chain2[4]; Indirect *partial, *partial2; + Indirect *p = NULL, *p2 = NULL; ext4_lblk_t max_block; __le32 nr = 0, nr2 = 0; int n = 0, n2 = 0; @@ -1263,7 +1264,7 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, } - partial = ext4_find_shared(inode, n, offsets, chain, &nr); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); if (nr) { if (partial == chain) { /* Shared branch grows from the inode */ @@ -1288,13 +1289,11 @@ int ext4_ind_remove_space(handle_t *handle, struct inode *inode, partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } end_range: - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); if (nr2) { if (partial2 == chain2) { /* @@ -1324,16 +1323,14 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } goto do_indirects; } /* Punch happened within the same level (n == n2) */ - partial = ext4_find_shared(inode, n, offsets, chain, &nr); - partial2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); + partial = p = ext4_find_shared(inode, n, offsets, chain, &nr); + partial2 = p2 = ext4_find_shared(inode, n2, offsets2, chain2, &nr2); /* Free top, but only if partial2 isn't its subtree. */ if (nr) { @@ -1390,15 +1387,7 @@ end_range: partial->p + 1, partial2->p, (chain+n-1) - partial); - while (partial > chain) { - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); - } - while (partial2 > chain2) { - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); - } - return 0; + goto cleanup; } /* @@ -1413,8 +1402,6 @@ end_range: partial->p + 1, (__le32 *)partial->bh->b_data+addr_per_block, (chain+n-1) - partial); - BUFFER_TRACE(partial->bh, "call brelse"); - brelse(partial->bh); partial--; } if (partial2 > chain2 && depth2 <= depth) { @@ -1422,11 +1409,21 @@ end_range: (__le32 *)partial2->bh->b_data, partial2->p, (chain2+n2-1) - partial2); - BUFFER_TRACE(partial2->bh, "call brelse"); - brelse(partial2->bh); partial2--; } } + +cleanup: + while (p && p > chain) { + BUFFER_TRACE(p->bh, "call brelse"); + brelse(p->bh); + p--; + } + while (p2 && p2 > chain2) { + BUFFER_TRACE(p2->bh, "call brelse"); + brelse(p2->bh); + p2--; + } return 0; do_indirects: @@ -1434,7 +1431,7 @@ do_indirects: switch (offsets[0]) { default: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_IND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1); @@ -1443,7 +1440,7 @@ do_indirects: /* fall through */ case EXT4_IND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_DIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2); @@ -1452,7 +1449,7 @@ do_indirects: /* fall through */ case EXT4_DIND_BLOCK: if (++n >= n2) - return 0; + break; nr = i_data[EXT4_TIND_BLOCK]; if (nr) { ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3); @@ -1462,5 +1459,5 @@ do_indirects: case EXT4_TIND_BLOCK: ; } - return 0; + goto cleanup; } -- cgit From 5a698243930c441afccec04e4d5dc8febfd2b775 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 21 Mar 2019 17:57:56 -0400 Subject: NFS: Fix a typo in nfs_init_timeout_values() Specifying a retrans=0 mount parameter to a NFS/TCP mount, is inadvertently causing the NFS client to rewrite any specified timeout parameter to the default of 60 seconds. Fixes: a956beda19a6 ("NFS: Allow the mount option retrans=0") Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fb1cf1a4bda2..90d71fda65ce 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -453,7 +453,7 @@ void nfs_init_timeout_values(struct rpc_timeout *to, int proto, case XPRT_TRANSPORT_RDMA: if (retrans == NFS_UNSPEC_RETRANS) to->to_retries = NFS_DEF_TCP_RETRANS; - if (timeo == NFS_UNSPEC_TIMEO || to->to_retries == 0) + if (timeo == NFS_UNSPEC_TIMEO || to->to_initval == 0) to->to_initval = NFS_DEF_TCP_TIMEO * HZ / 10; if (to->to_initval > NFS_MAX_TCP_TIMEOUT) to->to_initval = NFS_MAX_TCP_TIMEOUT; -- cgit From 166bd5b889ac61369c34650887a5c6b899f5e976 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 22 Mar 2019 23:03:56 -0400 Subject: pNFS/flexfiles: Fix layoutstats handling during read failovers During a read failover, we may end up changing the value of the pgio_mirror_idx, so make sure that we record the layout stats before that update. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index f9264e1922a2..6673d4ff5a2a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1289,6 +1289,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, static int ff_layout_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { + int new_idx = hdr->pgio_mirror_idx; int err; trace_nfs4_pnfs_read(hdr, task->tk_status); @@ -1307,7 +1308,7 @@ static int ff_layout_read_done_cb(struct rpc_task *task, case -NFS4ERR_RESET_TO_PNFS: if (ff_layout_choose_best_ds_for_read(hdr->lseg, hdr->pgio_mirror_idx + 1, - &hdr->pgio_mirror_idx)) + &new_idx)) goto out_layouterror; set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags); return task->tk_status; @@ -1320,7 +1321,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task, return 0; out_layouterror: + ff_layout_read_record_layoutstats_done(task, hdr); ff_layout_send_layouterror(hdr->lseg); + hdr->pgio_mirror_idx = new_idx; out_eagain: rpc_restart_call_prepare(task); return -EAGAIN; -- cgit From 18915b5873f07e5030e6fb108a050fa7c71c59fb Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 23 Mar 2019 12:10:29 -0400 Subject: ext4: prohibit fstrim in norecovery mode The ext4 fstrim implementation uses the block bitmaps to find free space that can be discarded. If we haven't replayed the journal, the bitmaps will be stale and we absolutely *cannot* use stale metadata to zap the underlying storage. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- fs/ext4/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index eb8ca8d80885..73435444b159 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -1000,6 +1000,13 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; + /* + * We haven't replayed the journal, so we cannot use our + * block-bitmap-guided storage zapping commands. + */ + if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) + return -EROFS; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; -- cgit From a7fb107b7d8982ac76c958a0d2838a151b03e97e Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 21 Mar 2019 16:34:44 -0700 Subject: net: phy: Re-parent menus for MDIO bus drivers correctly After 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") the various MDIO bus drivers were no longer parented with config PHYLIB but with config MDIO_BUS which is not a menuconfig, fix this by depending on MDIO_DEVICE which is a menuconfig. This is visually nicer and less confusing for users. Fixes: 90eff9096c01 ("net: phy: Allow splitting MDIO bus/device support from PHYs") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/Kconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index 071869db44cf..520657945b82 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -7,6 +7,8 @@ menuconfig MDIO_DEVICE help MDIO devices and driver infrastructure code. +if MDIO_DEVICE + config MDIO_BUS tristate default m if PHYLIB=m @@ -179,6 +181,7 @@ config MDIO_XGENE APM X-Gene SoC's. endif +endif config PHYLINK tristate -- cgit From fa3a419d2f674b431d38748cb58fb7da17ee8949 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:07 +0800 Subject: net: xilinx: fix possible object reference leak The call to of_parse_phandle returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/xilinx/xilinx_axienet_main.c:1624:1-7: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 1569, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Anirudha Sarangi Cc: John Linn Cc: "David S. Miller" Cc: Michal Simek Cc: netdev@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index ec7e7ec24ff9..4041c75997ba 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1575,12 +1575,14 @@ static int axienet_probe(struct platform_device *pdev) ret = of_address_to_resource(np, 0, &dmares); if (ret) { dev_err(&pdev->dev, "unable to get DMA resource\n"); + of_node_put(np); goto free_netdev; } lp->dma_regs = devm_ioremap_resource(&pdev->dev, &dmares); if (IS_ERR(lp->dma_regs)) { dev_err(&pdev->dev, "could not map DMA regs\n"); ret = PTR_ERR(lp->dma_regs); + of_node_put(np); goto free_netdev; } lp->rx_irq = irq_of_parse_and_map(np, 1); -- cgit From be693df3cf9dd113ff1d2c0d8150199efdba37f6 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:08 +0800 Subject: net: ibm: fix possible object reference leak The call to ehea_get_eth_dn returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/ibm/ehea/ehea_main.c:3163:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3154, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Douglas Miller Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ehea/ehea_main.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index 3baabdc89726..90b62c1412c8 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -3160,6 +3160,7 @@ static ssize_t ehea_probe_port(struct device *dev, if (ehea_add_adapter_mr(adapter)) { pr_err("creating MR failed\n"); + of_node_put(eth_dn); return -EIO; } -- cgit From 75eac7b5f68b0a0671e795ac636457ee27cc11d8 Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Fri, 22 Mar 2019 11:04:09 +0800 Subject: net: ethernet: ti: fix possible object reference leak The call to of_get_child_by_name returns a node pointer with refcount incremented thus it must be explicitly decremented after the last usage. Detected by coccinelle with the following warnings: ./drivers/net/ethernet/ti/netcp_ethss.c:3661:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3654, but without a corresponding object release within this function. ./drivers/net/ethernet/ti/netcp_ethss.c:3665:2-8: ERROR: missing of_node_put; acquired a node pointer with refcount incremented on line 3654, but without a corresponding object release within this function. Signed-off-by: Wen Yang Cc: Wingman Kwok Cc: Murali Karicheri Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/netcp_ethss.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c index 5174d318901e..0a920c5936b2 100644 --- a/drivers/net/ethernet/ti/netcp_ethss.c +++ b/drivers/net/ethernet/ti/netcp_ethss.c @@ -3657,12 +3657,16 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev, ret = netcp_txpipe_init(&gbe_dev->tx_pipe, netcp_device, gbe_dev->dma_chan_name, gbe_dev->tx_queue_id); - if (ret) + if (ret) { + of_node_put(interfaces); return ret; + } ret = netcp_txpipe_open(&gbe_dev->tx_pipe); - if (ret) + if (ret) { + of_node_put(interfaces); return ret; + } /* Create network interfaces */ INIT_LIST_HEAD(&gbe_dev->gbe_intf_head); -- cgit From 23c78343ec36990709b636a9e02bad814f4384ad Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 22 Mar 2019 07:39:35 +0100 Subject: r8169: fix cable re-plugging issue Bartek reported that after few cable unplug/replug cycles suddenly replug isn't detected any longer. His system uses a RTL8106, I wasn't able to reproduce the issue with RTL8168g. According to his bisect the referenced commit caused the regression. As Realtek doesn't release datasheets or errata it's hard to say what's the actual root cause, but this change was reported to fix the issue. Fixes: 38caff5a445b ("r8169: handle all interrupt events in the hard irq handler") Reported-by: Bartosz Skrzypczak Suggested-by: Bartosz Skrzypczak Tested-by: Bartosz Skrzypczak Signed-off-by: Heiner Kallweit Signed-off-by: David S. Miller --- drivers/net/ethernet/realtek/r8169.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 9dd1cd2c0c68..7562ccbbb39a 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -6542,7 +6542,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance) set_bit(RTL_FLAG_TASK_RESET_PENDING, tp->wk.flags); } - if (status & RTL_EVENT_NAPI) { + if (status & (RTL_EVENT_NAPI | LinkChg)) { rtl_irq_disable(tp); napi_schedule_irqoff(&tp->napi); } -- cgit From 064c5d6881e897077639e04973de26440ee205e6 Mon Sep 17 00:00:00 2001 From: John Hurley Date: Fri, 22 Mar 2019 12:37:35 +0000 Subject: net: sched: fix cleanup NULL pointer exception in act_mirr A new mirred action is created by the tcf_mirred_init function. This contains a list head struct which is inserted into a global list on successful creation of a new action. However, after a creation, it is still possible to error out and call the tcf_idr_release function. This, in turn, calls the act_mirr cleanup function via __tcf_idr_release and __tcf_action_put. This cleanup function tries to delete the list entry which is as yet uninitialised, leading to a NULL pointer exception. Fix this by initialising the list entry on creation of a new action. Bug report: BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 PGD 8000000840c73067 P4D 8000000840c73067 PUD 858dcc067 PMD 0 Oops: 0002 [#1] SMP PTI CPU: 32 PID: 5636 Comm: handler194 Tainted: G OE 5.0.0+ #186 Hardware name: Dell Inc. PowerEdge R730/0599V5, BIOS 1.3.6 06/03/2015 RIP: 0010:tcf_mirred_release+0x42/0xa7 [act_mirred] Code: f0 90 39 c0 e8 52 04 57 c8 48 c7 c7 b8 80 39 c0 e8 94 fa d4 c7 48 8b 93 d0 00 00 00 48 8b 83 d8 00 00 00 48 c7 c7 f0 90 39 c0 <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 83 d0 00 RSP: 0018:ffffac4aa059f688 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff9dcd1b214d00 RCX: 0000000000000000 RDX: 0000000000000000 RSI: ffff9dcd1fa165f8 RDI: ffffffffc03990f0 RBP: ffff9dccf9c7af80 R08: 0000000000000a3b R09: 0000000000000000 R10: ffff9dccfa11f420 R11: 0000000000000000 R12: 0000000000000001 R13: ffff9dcd16b433c0 R14: ffff9dcd1b214d80 R15: 0000000000000000 FS: 00007f441bfff700(0000) GS:ffff9dcd1fa00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000839e64004 CR4: 00000000001606e0 Call Trace: tcf_action_cleanup+0x59/0xca __tcf_action_put+0x54/0x6b __tcf_idr_release.cold.33+0x9/0x12 tcf_mirred_init.cold.20+0x22e/0x3b0 [act_mirred] tcf_action_init_1+0x3d0/0x4c0 tcf_action_init+0x9c/0x130 tcf_exts_validate+0xab/0xc0 fl_change+0x1ca/0x982 [cls_flower] tc_new_tfilter+0x647/0x8d0 ? load_balance+0x14b/0x9e0 rtnetlink_rcv_msg+0xe3/0x370 ? __switch_to_asm+0x40/0x70 ? __switch_to_asm+0x34/0x70 ? _cond_resched+0x15/0x30 ? __kmalloc_node_track_caller+0x1d4/0x2b0 ? rtnl_calcit.isra.31+0xf0/0xf0 netlink_rcv_skb+0x49/0x110 netlink_unicast+0x16f/0x210 netlink_sendmsg+0x1df/0x390 sock_sendmsg+0x36/0x40 ___sys_sendmsg+0x27b/0x2c0 ? futex_wake+0x80/0x140 ? do_futex+0x2b9/0xac0 ? ep_scan_ready_list.constprop.22+0x1f2/0x210 ? ep_poll+0x7a/0x430 __sys_sendmsg+0x47/0x80 do_syscall_64+0x55/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 4e232818bd32 ("net: sched: act_mirred: remove dependency on rtnl lock") Signed-off-by: John Hurley Reviewed-by: Jakub Kicinski Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_mirred.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index cd712e4e8998..17cc6bd4c57c 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -159,12 +159,15 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, tcf_idr_release(*a, bind); return -EEXIST; } + + m = to_mirred(*a); + if (ret == ACT_P_CREATED) + INIT_LIST_HEAD(&m->tcfm_list); + err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack); if (err < 0) goto release_idr; - m = to_mirred(*a); - spin_lock_bh(&m->tcf_lock); if (parm->ifindex) { -- cgit From 737889efe9713a0f20a75fd0de952841d9275e6b Mon Sep 17 00:00:00 2001 From: Jon Maloy Date: Fri, 22 Mar 2019 15:03:51 +0100 Subject: tipc: tipc clang warning When checking the code with clang -Wsometimes-uninitialized we get the following warning: if (!tipc_link_is_establishing(l)) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/node.c:847:46: note: uninitialized use occurs here tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); net/tipc/node.c:831:2: note: remove the 'if' if its condition is always true if (!tipc_link_is_establishing(l)) { ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ net/tipc/node.c:821:31: note: initialize the variable 'maddr' to silence this warning struct tipc_media_addr *maddr; We fix this by initializing 'maddr' to NULL. For the matter of clarity, we also test if 'xmitq' is non-empty before we use it and 'maddr' further down in the function. It will never happen that 'xmitq' is non- empty at the same time as 'maddr' is NULL, so this is a sufficient test. Fixes: 598411d70f85 ("tipc: make resetting of links non-atomic") Reported-by: Nathan Chancellor Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2dc4919ab23c..dd3b6dc17662 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -817,10 +817,10 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) { struct tipc_link_entry *le = &n->links[bearer_id]; + struct tipc_media_addr *maddr = NULL; struct tipc_link *l = le->link; - struct tipc_media_addr *maddr; - struct sk_buff_head xmitq; int old_bearer_id = bearer_id; + struct sk_buff_head xmitq; if (!l) return; @@ -844,7 +844,8 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_node_write_unlock(n); if (delete) tipc_mon_remove_peer(n->net, n->addr, old_bearer_id); - tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); + if (!skb_queue_empty(&xmitq)) + tipc_bearer_xmit(n->net, bearer_id, &xmitq, maddr); tipc_sk_rcv(n->net, &le->inputq); } -- cgit From 526949e877f44672d408bfe291e39860c13f2e24 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 22 Mar 2019 15:18:43 +0100 Subject: rxrpc: avoid clang -Wuninitialized warning clang produces a false-positive warning as it fails to notice that "lost = true" implies that "ret" is initialized: net/rxrpc/output.c:402:6: error: variable 'ret' is used uninitialized whenever 'if' condition is true [-Werror,-Wsometimes-uninitialized] if (lost) ^~~~ net/rxrpc/output.c:437:6: note: uninitialized use occurs here if (ret >= 0) { ^~~ net/rxrpc/output.c:402:2: note: remove the 'if' if its condition is always false if (lost) ^~~~~~~~~ net/rxrpc/output.c:339:9: note: initialize the variable 'ret' to silence this warning int ret, opt; ^ = 0 Rearrange the code to make that more obvious and avoid the warning. Signed-off-by: Arnd Bergmann Reviewed-by: Nathan Chancellor Signed-off-by: David S. Miller --- net/rxrpc/output.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 736aa9281100..004c762c2e8d 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -335,7 +335,6 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, struct kvec iov[2]; rxrpc_serial_t serial; size_t len; - bool lost = false; int ret, opt; _enter(",{%d}", skb->len); @@ -393,14 +392,14 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, static int lose; if ((lose++ & 7) == 7) { ret = 0; - lost = true; + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, + whdr.flags, retrans, true); + goto done; } } - trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, - retrans, lost); - if (lost) - goto done; + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, retrans, + false); /* send the packet with the don't fragment bit set if we currently * think it's small enough */ -- cgit From 2a6a8e2d9004b5303fcb494588ba3a3b87a256c3 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Thu, 20 Dec 2018 14:16:26 +0300 Subject: clocksource/drivers/clps711x: Remove board support Since board support for the CLPS711X platform was removed, remove the board support from the clps711x-timer driver. Signed-off-by: Alexander Shiyan Signed-off-by: Thomas Gleixner Acked-by: Arnd Bergmann Cc: Daniel Lezcano Link: https://lkml.kernel.org/r/20181220111626.17140-1-shc_work@mail.ru --- drivers/clocksource/clps711x-timer.c | 45 +++++++++++------------------------- 1 file changed, 13 insertions(+), 32 deletions(-) diff --git a/drivers/clocksource/clps711x-timer.c b/drivers/clocksource/clps711x-timer.c index cdc251524f5e..857f8c086274 100644 --- a/drivers/clocksource/clps711x-timer.c +++ b/drivers/clocksource/clps711x-timer.c @@ -31,16 +31,9 @@ static u64 notrace clps711x_sched_clock_read(void) return ~readw(tcd); } -static int __init _clps711x_clksrc_init(struct clk *clock, void __iomem *base) +static void __init clps711x_clksrc_init(struct clk *clock, void __iomem *base) { - unsigned long rate; - - if (!base) - return -ENOMEM; - if (IS_ERR(clock)) - return PTR_ERR(clock); - - rate = clk_get_rate(clock); + unsigned long rate = clk_get_rate(clock); tcd = base; @@ -48,8 +41,6 @@ static int __init _clps711x_clksrc_init(struct clk *clock, void __iomem *base) clocksource_mmio_readw_down); sched_clock_register(clps711x_sched_clock_read, 16, rate); - - return 0; } static irqreturn_t clps711x_timer_interrupt(int irq, void *dev_id) @@ -67,13 +58,6 @@ static int __init _clps711x_clkevt_init(struct clk *clock, void __iomem *base, struct clock_event_device *clkevt; unsigned long rate; - if (!irq) - return -EINVAL; - if (!base) - return -ENOMEM; - if (IS_ERR(clock)) - return PTR_ERR(clock); - clkevt = kzalloc(sizeof(*clkevt), GFP_KERNEL); if (!clkevt) return -ENOMEM; @@ -93,32 +77,29 @@ static int __init _clps711x_clkevt_init(struct clk *clock, void __iomem *base, "clps711x-timer", clkevt); } -static void __init clps711x_clksrc_init(void __iomem *tc1_base, - void __iomem *tc2_base, - unsigned int irq) -{ - struct clk *tc1 = clk_get_sys("clps711x-timer.0", NULL); - struct clk *tc2 = clk_get_sys("clps711x-timer.1", NULL); - - BUG_ON(_clps711x_clksrc_init(tc1, tc1_base)); - BUG_ON(_clps711x_clkevt_init(tc2, tc2_base, irq)); -} - -#ifdef CONFIG_TIMER_OF static int __init clps711x_timer_init(struct device_node *np) { unsigned int irq = irq_of_parse_and_map(np, 0); struct clk *clock = of_clk_get(np, 0); void __iomem *base = of_iomap(np, 0); + if (!base) + return -ENOMEM; + if (!irq) + return -EINVAL; + if (IS_ERR(clock)) + return PTR_ERR(clock); + switch (of_alias_get_id(np, "timer")) { case CLPS711X_CLKSRC_CLOCKSOURCE: - return _clps711x_clksrc_init(clock, base); + clps711x_clksrc_init(clock, base); + break; case CLPS711X_CLKSRC_CLOCKEVENT: return _clps711x_clkevt_init(clock, base, irq); default: return -EINVAL; } + + return 0; } TIMER_OF_DECLARE(clps711x, "cirrus,ep7209-timer", clps711x_timer_init); -#endif -- cgit From 13f063815265c5397ee92d84436804bc9fb6b58b Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Sun, 24 Mar 2019 17:57:07 +0800 Subject: blk-mq: use blk_mq_put_driver_tag() to put tag Expect arguments, blk_mq_put_driver_tag_hctx() and blk_mq_put_driver_tag() is same. We can just use argument 'request' to put tag by blk_mq_put_driver_tag(). Then we can remove the unused blk_mq_put_driver_tag_hctx(). Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/blk-flush.c | 4 ++-- block/blk-mq.h | 9 --------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/block/blk-flush.c b/block/blk-flush.c index 6e0f2d97fc6d..d95f94892015 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -220,7 +220,7 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); flush_rq->tag = -1; } else { - blk_mq_put_driver_tag_hctx(hctx, flush_rq); + blk_mq_put_driver_tag(flush_rq); flush_rq->internal_tag = -1; } @@ -324,7 +324,7 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) if (q->elevator) { WARN_ON(rq->tag < 0); - blk_mq_put_driver_tag_hctx(hctx, rq); + blk_mq_put_driver_tag(rq); } /* diff --git a/block/blk-mq.h b/block/blk-mq.h index 0ed8e5a8729f..d704fc7766f4 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -224,15 +224,6 @@ static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx, } } -static inline void blk_mq_put_driver_tag_hctx(struct blk_mq_hw_ctx *hctx, - struct request *rq) -{ - if (rq->tag == -1 || rq->internal_tag == -1) - return; - - __blk_mq_put_driver_tag(hctx, rq); -} - static inline void blk_mq_put_driver_tag(struct request *rq) { if (rq->tag == -1 || rq->internal_tag == -1) -- cgit From 85fae294e1a506b4213668716acb586bd6b4ae1e Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Sun, 24 Mar 2019 17:57:08 +0800 Subject: blk-mq: update comment for blk_mq_hctx_has_pending() For now, blk_mq_hctx_has_pending() checks any of ctx, hctx->dispatch or io scheduler have pending work. So, update the comment accordingly. Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- block/blk-mq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 70b210a308c4..28080b0235f0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -59,7 +59,8 @@ static int blk_mq_poll_stats_bkt(const struct request *rq) } /* - * Check if any of the ctx's have pending work in this hardware queue + * Check if any of the ctx, dispatch list or elevator + * have pending work in this hardware queue. */ static bool blk_mq_hctx_has_pending(struct blk_mq_hw_ctx *hctx) { -- cgit From 8c2ffd9174779014c3fe1f96d9dc3641d9175f00 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 24 Mar 2019 14:02:26 -0700 Subject: Linux 5.1-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 99c0530489ef..c0a34064c574 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Shy Crocodile # *DOCUMENTATION* -- cgit From 7f2daa96759b0700ad28579133aa91bc663632a7 Mon Sep 17 00:00:00 2001 From: Peng Hao Date: Sun, 10 Mar 2019 01:29:44 +0800 Subject: x86/resctrl: Remove unused variable Variable "struct rdt_resource *r" is set but not used. So remove it. Signed-off-by: Peng Hao Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/1552152584-26087-1-git-send-email-peng.hao2@zte.com.cn --- arch/x86/kernel/cpu/resctrl/monitor.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index f33f11f69078..1573a0a6b525 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -501,11 +501,8 @@ out_unlock: void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms) { unsigned long delay = msecs_to_jiffies(delay_ms); - struct rdt_resource *r; int cpu; - r = &rdt_resources_all[RDT_RESOURCE_L3]; - cpu = cpumask_any(&dom->cpu_mask); dom->cqm_work_cpu = cpu; -- cgit From b6a36e5ddf8496ec83a14589f32f58bbaf022046 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Fri, 15 Mar 2019 11:46:21 +1000 Subject: drm/fb: avoid setting 0 depth. If the downscaling fails and we end up with a best_depth of 0, then ignore it. This actually works around a cascade of failure, but it the simplest fix for now. The scaling patch broke the udl driver, as the udl driver doesn't expose planes at all, so gets the two default 32-bit formats, but the udl driver then ask for 16bpp fbdev, and the scaling code falls over. This fixes the udl driver since the scaled depth support was added. Fixes: f4bd542bcaee ("drm/fb-helper: Scale back depth to supported maximum") Cc: Daniel Vetter Reviewed-by: Linus Walleij Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190315014621.21816-2-airlied@gmail.com --- drivers/gpu/drm/drm_fb_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 0e9349ff2d16..af2ab640cadb 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -1963,7 +1963,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper, best_depth = fmt->depth; } } - if (sizes.surface_depth != best_depth) { + if (sizes.surface_depth != best_depth && best_depth) { DRM_INFO("requested bpp %d, scaled depth down to %d", sizes.surface_bpp, best_depth); sizes.surface_depth = best_depth; -- cgit From 1d382264d911d91a8be5dbed1f0e053eb3245d81 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Mar 2019 01:49:10 +0100 Subject: bpf, libbpf: fix version info and add it to shared object Even though libbpf's versioning script for the linker (libbpf.map) is pointing to 0.0.2, the BPF_EXTRAVERSION in the Makefile has not been updated along with it and is therefore still on 0.0.1. While fixing up, I also noticed that the generated shared object versioning information is missing, typical convention is to have a linker name (libbpf.so), soname (libbpf.so.0) and real name (libbpf.so.0.0.2) for library management. This is based upon the LIBBPF_VERSION as well. The build will then produce the following bpf libraries: # ll libbpf* libbpf.a libbpf.so -> libbpf.so.0.0.2 libbpf.so.0 -> libbpf.so.0.0.2 libbpf.so.0.0.2 # readelf -d libbpf.so.0.0.2 | grep SONAME 0x000000000000000e (SONAME) Library soname: [libbpf.so.0] And install them accordingly: # rm -rf /tmp/bld; mkdir /tmp/bld; make -j$(nproc) O=/tmp/bld install Auto-detecting system features: ... libelf: [ on ] ... bpf: [ on ] CC /tmp/bld/libbpf.o CC /tmp/bld/bpf.o CC /tmp/bld/nlattr.o CC /tmp/bld/btf.o CC /tmp/bld/libbpf_errno.o CC /tmp/bld/str_error.o CC /tmp/bld/netlink.o CC /tmp/bld/bpf_prog_linfo.o CC /tmp/bld/libbpf_probes.o CC /tmp/bld/xsk.o LD /tmp/bld/libbpf-in.o LINK /tmp/bld/libbpf.a LINK /tmp/bld/libbpf.so.0.0.2 LINK /tmp/bld/test_libbpf INSTALL /tmp/bld/libbpf.a INSTALL /tmp/bld/libbpf.so.0.0.2 # ll /usr/local/lib64/libbpf.* /usr/local/lib64/libbpf.a /usr/local/lib64/libbpf.so -> libbpf.so.0.0.2 /usr/local/lib64/libbpf.so.0 -> libbpf.so.0.0.2 /usr/local/lib64/libbpf.so.0.0.2 Fixes: 1bf4b05810fe ("tools: bpftool: add probes for eBPF program types") Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature check") Reported-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/Makefile | 42 ++++++++++++++++++++++++++++-------------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 61aaacf0cfa1..5bf8e52c41fc 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -3,7 +3,7 @@ BPF_VERSION = 0 BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 1 +BPF_EXTRAVERSION = 2 MAKEFLAGS += --no-print-directory @@ -79,8 +79,6 @@ export prefix libdir src obj libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -LIB_FILE = libbpf.a libbpf.so - VERSION = $(BPF_VERSION) PATCHLEVEL = $(BPF_PATCHLEVEL) EXTRAVERSION = $(BPF_EXTRAVERSION) @@ -88,7 +86,10 @@ EXTRAVERSION = $(BPF_EXTRAVERSION) OBJ = $@ N = -LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) +LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) + +LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) +LIB_FILE = libbpf.a libbpf.so* # Set compile option CFLAGS ifdef EXTRA_CFLAGS @@ -128,16 +129,18 @@ all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -BPF_IN := $(OUTPUT)libbpf-in.o -LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) -VERSION_SCRIPT := libbpf.map +BPF_IN := $(OUTPUT)libbpf-in.o +VERSION_SCRIPT := libbpf.map + +LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_FILE) +CMD_TARGETS = $(LIB_TARGET) CXX_TEST_TARGET = $(OUTPUT)test_libbpf @@ -170,9 +173,13 @@ $(BPF_IN): force elfdep bpfdep echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf -$(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ - $^ -o $@ +$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) + +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -o $@ + @ln -sf $(@F) $(OUTPUT)libbpf.so + @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ @@ -192,6 +199,12 @@ check_abi: $(OUTPUT)libbpf.so exit 1; \ fi +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -200,8 +213,9 @@ define do_install endef install_lib: all_cmd - $(call QUIET_INSTALL, $(LIB_FILE)) \ - $(call do_install,$(LIB_FILE),$(libdir_SQ)) + $(call QUIET_INSTALL, $(LIB_TARGET)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) install_headers: $(call QUIET_INSTALL, headers) \ @@ -219,7 +233,7 @@ config-clean: clean: $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ - *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS + *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -- cgit From 63197f78bca2d86093126783b0ee6519bd652435 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 23 Mar 2019 01:49:11 +0100 Subject: bpf, libbpf: clarify bump in libbpf version info The current documentation suggests that we would need to bump the libbpf version on every change. Lets clarify this a bit more and reflect what we do today in practice, that is, bumping it once per development cycle. Fixes: 76d1b894c515 ("libbpf: Document API and ABI conventions") Reported-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- tools/lib/bpf/README.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst index 5788479384ca..cef7b77eab69 100644 --- a/tools/lib/bpf/README.rst +++ b/tools/lib/bpf/README.rst @@ -111,6 +111,7 @@ starting from ``0.0.1``. Every time ABI is being changed, e.g. because a new symbol is added or semantic of existing symbol is changed, ABI version should be bumped. +This bump in ABI version is at most once per kernel development cycle. For example, if current state of ``libbpf.map`` is: -- cgit From 3f04e0a6cfebf48152ac64502346cdc258811f79 Mon Sep 17 00:00:00 2001 From: Noralf Trønnes Date: Fri, 8 Feb 2019 15:01:02 +0100 Subject: drm: Fix drm_release() and device unplug MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If userspace has open fd(s) when drm_dev_unplug() is run, it will result in drm_dev_unregister() being called twice. First in drm_dev_unplug() and then later in drm_release() through the call to drm_put_dev(). Since userspace already holds a ref on drm_device through the drm_minor, it's not necessary to add extra ref counting based on no open file handles. Instead just drm_dev_put() unconditionally in drm_dev_unplug(). We now have this: - Userpace holds a ref on drm_device as long as there's open fd(s) - The driver holds a ref on drm_device as long as it's bound to the struct device When both sides are done with drm_device, it is released. Signed-off-by: Noralf Trønnes Reviewed-by: Oleksandr Andrushchenko Reviewed-by: Daniel Vetter Reviewed-by: Sean Paul Signed-off-by: Dave Airlie Link: https://patchwork.freedesktop.org/patch/msgid/20190208140103.28919-2-noralf@tronnes.org --- drivers/gpu/drm/drm_drv.c | 6 +----- drivers/gpu/drm/drm_file.c | 6 ++---- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index 381581b01d48..05bbc2b622fc 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -376,11 +376,7 @@ void drm_dev_unplug(struct drm_device *dev) synchronize_srcu(&drm_unplug_srcu); drm_dev_unregister(dev); - - mutex_lock(&drm_global_mutex); - if (dev->open_count == 0) - drm_dev_put(dev); - mutex_unlock(&drm_global_mutex); + drm_dev_put(dev); } EXPORT_SYMBOL(drm_dev_unplug); diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 83a5bbca6e7e..7caa3c7ed978 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -489,11 +489,9 @@ int drm_release(struct inode *inode, struct file *filp) drm_close_helper(filp); - if (!--dev->open_count) { + if (!--dev->open_count) drm_lastclose(dev); - if (drm_dev_is_unplugged(dev)) - drm_put_dev(dev); - } + mutex_unlock(&drm_global_mutex); drm_minor_release(minor); -- cgit From a51143001d9e0683ab6f7968a516fc9243527e44 Mon Sep 17 00:00:00 2001 From: Robert Tarasov Date: Thu, 14 Mar 2019 15:53:39 -0700 Subject: drm/udl: Refactor edid retrieving in UDL driver (v2) Now drm/udl driver uses drm_do_get_edid() function to retrieve and validate all blocks of EDID data. Old approach had insufficient validation routine and had problems with retrieving of extra blocks Signed-off-by: Robert Tarasov Reviewed-by: Jani Nikula Signed-off-by: Dave Airlie [airlied: Fix spelling mistakes] Link: https://patchwork.freedesktop.org/patch/msgid/20190314225339.162386-1-tutankhamen@chromium.org --- drivers/gpu/drm/udl/udl_connector.c | 72 ++++++------------------------------- 1 file changed, 11 insertions(+), 61 deletions(-) diff --git a/drivers/gpu/drm/udl/udl_connector.c b/drivers/gpu/drm/udl/udl_connector.c index 66885c24590f..c1bd5e3d9e4a 100644 --- a/drivers/gpu/drm/udl/udl_connector.c +++ b/drivers/gpu/drm/udl/udl_connector.c @@ -18,18 +18,19 @@ #include "udl_connector.h" #include "udl_drv.h" -static bool udl_get_edid_block(struct udl_device *udl, int block_idx, - u8 *buff) +static int udl_get_edid_block(void *data, u8 *buf, unsigned int block, + size_t len) { int ret, i; u8 *read_buff; + struct udl_device *udl = data; read_buff = kmalloc(2, GFP_KERNEL); if (!read_buff) - return false; + return -1; - for (i = 0; i < EDID_LENGTH; i++) { - int bval = (i + block_idx * EDID_LENGTH) << 8; + for (i = 0; i < len; i++) { + int bval = (i + block * EDID_LENGTH) << 8; ret = usb_control_msg(udl->udev, usb_rcvctrlpipe(udl->udev, 0), (0x02), (0x80 | (0x02 << 5)), bval, @@ -37,60 +38,13 @@ static bool udl_get_edid_block(struct udl_device *udl, int block_idx, if (ret < 1) { DRM_ERROR("Read EDID byte %d failed err %x\n", i, ret); kfree(read_buff); - return false; + return -1; } - buff[i] = read_buff[1]; + buf[i] = read_buff[1]; } kfree(read_buff); - return true; -} - -static bool udl_get_edid(struct udl_device *udl, u8 **result_buff, - int *result_buff_size) -{ - int i, extensions; - u8 *block_buff = NULL, *buff_ptr; - - block_buff = kmalloc(EDID_LENGTH, GFP_KERNEL); - if (block_buff == NULL) - return false; - - if (udl_get_edid_block(udl, 0, block_buff) && - memchr_inv(block_buff, 0, EDID_LENGTH)) { - extensions = ((struct edid *)block_buff)->extensions; - if (extensions > 0) { - /* we have to read all extensions one by one */ - *result_buff_size = EDID_LENGTH * (extensions + 1); - *result_buff = kmalloc(*result_buff_size, GFP_KERNEL); - buff_ptr = *result_buff; - if (buff_ptr == NULL) { - kfree(block_buff); - return false; - } - memcpy(buff_ptr, block_buff, EDID_LENGTH); - kfree(block_buff); - buff_ptr += EDID_LENGTH; - for (i = 1; i < extensions; ++i) { - if (udl_get_edid_block(udl, i, buff_ptr)) { - buff_ptr += EDID_LENGTH; - } else { - kfree(*result_buff); - *result_buff = NULL; - return false; - } - } - return true; - } - /* we have only base edid block */ - *result_buff = block_buff; - *result_buff_size = EDID_LENGTH; - return true; - } - - kfree(block_buff); - - return false; + return 0; } static int udl_get_modes(struct drm_connector *connector) @@ -122,8 +76,6 @@ static enum drm_mode_status udl_mode_valid(struct drm_connector *connector, static enum drm_connector_status udl_detect(struct drm_connector *connector, bool force) { - u8 *edid_buff = NULL; - int edid_buff_size = 0; struct udl_device *udl = connector->dev->dev_private; struct udl_drm_connector *udl_connector = container_of(connector, @@ -136,12 +88,10 @@ udl_detect(struct drm_connector *connector, bool force) udl_connector->edid = NULL; } - - if (!udl_get_edid(udl, &edid_buff, &edid_buff_size)) + udl_connector->edid = drm_do_get_edid(connector, udl_get_edid_block, udl); + if (!udl_connector->edid) return connector_status_disconnected; - udl_connector->edid = (struct edid *)edid_buff; - return connector_status_connected; } -- cgit From 6cf4511e9729c00a7306cf94085f9cc3c52ee723 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sun, 24 Mar 2019 18:10:02 -0500 Subject: gpio: aspeed: fix a potential NULL pointer dereference In case devm_kzalloc, the patch returns ENOMEM to avoid potential NULL pointer dereference. Signed-off-by: Kangjie Lu Reviewed-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-aspeed.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/gpio/gpio-aspeed.c b/drivers/gpio/gpio-aspeed.c index 854bce4fb9e7..217507002dbc 100644 --- a/drivers/gpio/gpio-aspeed.c +++ b/drivers/gpio/gpio-aspeed.c @@ -1224,6 +1224,8 @@ static int __init aspeed_gpio_probe(struct platform_device *pdev) gpio->offset_timer = devm_kzalloc(&pdev->dev, gpio->chip.ngpio, GFP_KERNEL); + if (!gpio->offset_timer) + return -ENOMEM; return aspeed_gpio_setup_irqs(gpio, pdev); } -- cgit From aa9aaa4d61c0048d3faad056893cd7860bbc084c Mon Sep 17 00:00:00 2001 From: Erik Schmauss Date: Thu, 21 Mar 2019 18:20:21 -0700 Subject: ACPI: use different default debug value than ACPICA Rather than setting debug output flags during early init, its makes more sense to simply re-define ACPI_DEBUG_DEFAULT specifically for Linux. ACPICA commit 60903715711f4b00ca1831779a8a23279a66497d Link: https://github.com/acpica/acpica/commit/60903715 Fixes: ce5cbf53496b ("ACPI: Set debug output flags independent of ACPICA") Reported-by: Alexandru Gagniuc Tested-by: Alexandru Gagniuc Signed-off-by: Erik Schmauss Signed-off-by: Bob Moore Signed-off-by: Rafael J. Wysocki --- drivers/acpi/bus.c | 3 --- include/acpi/acoutput.h | 3 +++ include/acpi/platform/aclinux.h | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 6ecbbabf1233..eec263c9019e 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1043,9 +1043,6 @@ void __init acpi_early_init(void) acpi_permanent_mmap = true; - /* Initialize debug output. Linux does not use ACPICA defaults */ - acpi_dbg_level = ACPI_LV_INFO | ACPI_LV_REPAIR; - #ifdef CONFIG_X86 /* * If the machine falls into the DMI check table, diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h index 30b1ae53689f..c50542dc71e0 100644 --- a/include/acpi/acoutput.h +++ b/include/acpi/acoutput.h @@ -150,7 +150,10 @@ /* Defaults for debug_level, debug and normal */ +#ifndef ACPI_DEBUG_DEFAULT #define ACPI_DEBUG_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT | ACPI_LV_EVALUATION | ACPI_LV_REPAIR) +#endif + #define ACPI_NORMAL_DEFAULT (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT | ACPI_LV_REPAIR) #define ACPI_DEBUG_ALL (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL) diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h index 9ff328fd946a..624b90b34085 100644 --- a/include/acpi/platform/aclinux.h +++ b/include/acpi/platform/aclinux.h @@ -82,6 +82,11 @@ #define ACPI_NO_ERROR_MESSAGES #undef ACPI_DEBUG_OUTPUT +/* Use a specific bugging default separate from ACPICA */ + +#undef ACPI_DEBUG_DEFAULT +#define ACPI_DEBUG_DEFAULT (ACPI_LV_INFO | ACPI_LV_REPAIR) + /* External interface for __KERNEL__, stub is needed */ #define ACPI_EXTERNAL_RETURN_STATUS(prototype) \ -- cgit From 776e78677f514ecddd12dba48b9040958999bd5a Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 22 Mar 2019 15:26:56 +0000 Subject: drm/meson: Fix invalid pointer in meson_drv_unbind() meson_drv_bind() registers a meson_drm struct as the device's privdata, but meson_drv_unbind() tries to retrieve a drm_device. This may cause a segfault on shutdown: [ 5194.593429] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000197 ... [ 5194.788850] Call trace: [ 5194.791349] drm_dev_unregister+0x1c/0x118 [drm] [ 5194.795848] meson_drv_unbind+0x50/0x78 [meson_drm] Retrieve the right pointer in meson_drv_unbind(). Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Signed-off-by: Jean-Philippe Brucker Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190322152657.13752-1-jean-philippe.brucker@arm.com --- drivers/gpu/drm/meson/meson_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 2281ed3eb774..7e85802c5398 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -356,8 +356,8 @@ static int meson_drv_bind(struct device *dev) static void meson_drv_unbind(struct device *dev) { - struct drm_device *drm = dev_get_drvdata(dev); - struct meson_drm *priv = drm->dev_private; + struct meson_drm *priv = dev_get_drvdata(dev); + struct drm_device *drm = priv->drm; if (priv->canvas) { meson_canvas_free(priv->canvas, priv->canvas_id_osd1); -- cgit From 2d8f92897ad816f5dda54b2ed2fd9f2d7cb1abde Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Fri, 22 Mar 2019 15:26:57 +0000 Subject: drm/meson: Uninstall IRQ handler meson_drv_unbind() doesn't unregister the IRQ handler, which can lead to use-after-free if the IRQ fires after unbind: [ 64.656876] Unable to handle kernel paging request at virtual address ffff000011706dbc ... [ 64.662001] pc : meson_irq+0x18/0x30 [meson_drm] I'm assuming that a similar problem could happen on the error path of bind(), so uninstall the IRQ handler there as well. Fixes: bbbe775ec5b5 ("drm: Add support for Amlogic Meson Graphic Controller") Signed-off-by: Jean-Philippe Brucker Acked-by: Neil Armstrong Signed-off-by: Neil Armstrong Link: https://patchwork.freedesktop.org/patch/msgid/20190322152657.13752-2-jean-philippe.brucker@arm.com --- drivers/gpu/drm/meson/meson_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c index 7e85802c5398..8a4ebcb6405c 100644 --- a/drivers/gpu/drm/meson/meson_drv.c +++ b/drivers/gpu/drm/meson/meson_drv.c @@ -337,12 +337,14 @@ static int meson_drv_bind_master(struct device *dev, bool has_components) ret = drm_dev_register(drm, 0); if (ret) - goto free_drm; + goto uninstall_irq; drm_fbdev_generic_setup(drm, 32); return 0; +uninstall_irq: + drm_irq_uninstall(drm); free_drm: drm_dev_put(drm); @@ -367,6 +369,7 @@ static void meson_drv_unbind(struct device *dev) } drm_dev_unregister(drm); + drm_irq_uninstall(drm); drm_kms_helper_poll_fini(drm); drm_mode_config_cleanup(drm); drm_dev_put(drm); -- cgit From 3d565a21f2ce1f37479e91914734478c39b5c6fc Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 20 Mar 2019 09:11:10 +0100 Subject: drm/meson: fix TMDS clock filtering for DMT monitors DMT monitors does not necessarely report a maximum TMDS clock in a VSDB EDID extension. In this case, all modes are wrongly rejected, including the DRM fallback EDID. This patch only rejects modes whith clock > max_tmds_clock if the max_tmds_clock is specified. This will only reject 4:2:0 HDMI2.0 modes, who reports a clock > max_tmds_clock. Reported-by: Maxime Jourdan Fixes: d7d8fb7046b6 ("drm/meson: add HDMI div40 TMDS mode") Signed-off-by: Neil Armstrong Tested-by: Maxime Jourdan Reviewed-by: Maxime Jourdan Link: https://patchwork.freedesktop.org/patch/msgid/20190320081110.1718-1-narmstrong@baylibre.com --- drivers/gpu/drm/meson/meson_dw_hdmi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c index e28814f4ea6c..563953ec6ad0 100644 --- a/drivers/gpu/drm/meson/meson_dw_hdmi.c +++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c @@ -569,7 +569,8 @@ dw_hdmi_mode_valid(struct drm_connector *connector, DRM_DEBUG_DRIVER("Modeline " DRM_MODE_FMT "\n", DRM_MODE_ARG(mode)); /* If sink max TMDS clock, we reject the mode */ - if (mode->clock > connector->display_info.max_tmds_clock) + if (connector->display_info.max_tmds_clock && + mode->clock > connector->display_info.max_tmds_clock) return MODE_BAD; /* Check against non-VIC supported modes */ -- cgit From d9470757398a700d9450a43508000bcfd010c7a4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 22 Mar 2019 23:37:24 +1100 Subject: powerpc/64: Fix memcmp reading past the end of src/dest Chandan reported that fstests' generic/026 test hit a crash: BUG: Unable to handle kernel data access at 0xc00000062ac40000 Faulting instruction address: 0xc000000000092240 Oops: Kernel access of bad area, sig: 11 [#1] LE SMP NR_CPUS=2048 DEBUG_PAGEALLOC NUMA pSeries CPU: 0 PID: 27828 Comm: chacl Not tainted 5.0.0-rc2-next-20190115-00001-g6de6dba64dda #1 NIP: c000000000092240 LR: c00000000066a55c CTR: 0000000000000000 REGS: c00000062c0c3430 TRAP: 0300 Not tainted (5.0.0-rc2-next-20190115-00001-g6de6dba64dda) MSR: 8000000002009033 CR: 44000842 XER: 20000000 CFAR: 00007fff7f3108ac DAR: c00000062ac40000 DSISR: 40000000 IRQMASK: 0 GPR00: 0000000000000000 c00000062c0c36c0 c0000000017f4c00 c00000000121a660 GPR04: c00000062ac3fff9 0000000000000004 0000000000000020 00000000275b19c4 GPR08: 000000000000000c 46494c4500000000 5347495f41434c5f c0000000026073a0 GPR12: 0000000000000000 c0000000027a0000 0000000000000000 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: c00000062ea70020 c00000062c0c38d0 0000000000000002 0000000000000002 GPR24: c00000062ac3ffe8 00000000275b19c4 0000000000000001 c00000062ac30000 GPR28: c00000062c0c38d0 c00000062ac30050 c00000062ac30058 0000000000000000 NIP memcmp+0x120/0x690 LR xfs_attr3_leaf_lookup_int+0x53c/0x5b0 Call Trace: xfs_attr3_leaf_lookup_int+0x78/0x5b0 (unreliable) xfs_da3_node_lookup_int+0x32c/0x5a0 xfs_attr_node_addname+0x170/0x6b0 xfs_attr_set+0x2ac/0x340 __xfs_set_acl+0xf0/0x230 xfs_set_acl+0xd0/0x160 set_posix_acl+0xc0/0x130 posix_acl_xattr_set+0x68/0x110 __vfs_setxattr+0xa4/0x110 __vfs_setxattr_noperm+0xac/0x240 vfs_setxattr+0x128/0x130 setxattr+0x248/0x600 path_setxattr+0x108/0x120 sys_setxattr+0x28/0x40 system_call+0x5c/0x70 Instruction dump: 7d201c28 7d402428 7c295040 38630008 38840008 408201f0 4200ffe8 2c050000 4182ff6c 20c50008 54c61838 7d201c28 <7d402428> 7d293436 7d4a3436 7c295040 The instruction dump decodes as: subfic r6,r5,8 rlwinm r6,r6,3,0,28 ldbrx r9,0,r3 ldbrx r10,0,r4 <- Which shows us doing an 8 byte load from c00000062ac3fff9, which crosses the page boundary at c00000062ac40000 and faults. It's not OK for memcmp to read past the end of the source or destination buffers if that would cross a page boundary, because we don't know that the next page is mapped. As pointed out by Segher, we can read past the end of the source or destination as long as we don't cross a 4K boundary, because that's our minimum page size on all platforms. The bug is in the code at the .Lcmp_rest_lt8bytes label. When we get there we know that s1 is 8-byte aligned and we have at least 1 byte to read, so a single 8-byte load won't read past the end of s1 and cross a page boundary. But we have to be more careful with s2. So check if it's within 8 bytes of a 4K boundary and if so go to the byte-by-byte loop. Fixes: 2d9ee327adce ("powerpc/64: Align bytes before fall back to .Lshort in powerpc64 memcmp()") Cc: stable@vger.kernel.org # v4.19+ Reported-by: Chandan Rajendra Signed-off-by: Michael Ellerman Reviewed-by: Segher Boessenkool Tested-by: Chandan Rajendra Signed-off-by: Michael Ellerman --- arch/powerpc/lib/memcmp_64.S | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index 844d8e774492..b7f6f6e0b6e8 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -215,11 +215,20 @@ _GLOBAL_TOC(memcmp) beq .Lzero .Lcmp_rest_lt8bytes: - /* Here we have only less than 8 bytes to compare with. at least s1 - * Address is aligned with 8 bytes. - * The next double words are load and shift right with appropriate - * bits. + /* + * Here we have less than 8 bytes to compare. At least s1 is aligned to + * 8 bytes, but s2 may not be. We must make sure s2 + 7 doesn't cross a + * page boundary, otherwise we might read past the end of the buffer and + * trigger a page fault. We use 4K as the conservative minimum page + * size. If we detect that case we go to the byte-by-byte loop. + * + * Otherwise the next double word is loaded from s1 and s2, and shifted + * right to compare the appropriate bits. */ + clrldi r6,r4,(64-12) // r6 = r4 & 0xfff + cmpdi r6,0xff8 + bgt .Lshort + subfic r6,r5,8 slwi r6,r6,3 LD rA,0,r3 -- cgit From 945ab8f6de94430c23a82f3cf2e3f6d6f2945ff7 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 25 Mar 2019 08:15:14 -0400 Subject: locks: wake any locks blocked on request before deadlock check Andreas reported that he was seeing the tdbtorture test fail in some cases with -EDEADLCK when it wasn't before. Some debugging showed that deadlock detection was sometimes discovering the caller's lock request itself in a dependency chain. While we remove the request from the blocked_lock_hash prior to reattempting to acquire it, any locks that are blocked on that request will still be present in the hash and will still have their fl_blocker pointer set to the current request. This causes posix_locks_deadlock to find a deadlock dependency chain when it shouldn't, as a lock request cannot block itself. We are going to end up waking all of those blocked locks anyway when we go to reinsert the request back into the blocked_lock_hash, so just do it prior to checking for deadlocks. This ensures that any lock blocked on the current request will no longer be part of any blocked request chain. URL: https://bugzilla.kernel.org/show_bug.cgi?id=202975 Fixes: 5946c4319ebb ("fs/locks: allow a lock request to block other requests.") Cc: stable@vger.kernel.org Reported-by: Andreas Schneider Signed-off-by: Neil Brown Signed-off-by: Jeff Layton --- fs/locks.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/locks.c b/fs/locks.c index eaa1cfaf73b0..71d0c6c2aac5 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1160,6 +1160,11 @@ static int posix_lock_inode(struct inode *inode, struct file_lock *request, */ error = -EDEADLK; spin_lock(&blocked_lock_lock); + /* + * Ensure that we don't find any locks blocked on this + * request during deadlock detection. + */ + __locks_wake_up_blocks(request); if (likely(!posix_locks_deadlock(request, fl))) { error = FILE_LOCK_DEFERRED; __locks_insert_block(fl, request, -- cgit From 8bc32a285660e13fdcf92ddaf5b8653abe112040 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 22 Mar 2019 16:52:17 +0100 Subject: iommu: Don't print warning when IOMMU driver only supports unmanaged domains Print the warning about the fall-back to IOMMU_DOMAIN_DMA in iommu_group_get_for_dev() only when such a domain was actually allocated. Otherwise the user will get misleading warnings in the kernel log when the iommu driver used doesn't support IOMMU_DOMAIN_DMA and IOMMU_DOMAIN_IDENTITY. Fixes: fccb4e3b8ab09 ('iommu: Allow default domain type to be set on the kernel command line') Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 33a982e33716..109de67d5d72 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1105,10 +1105,12 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type); if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) { - dev_warn(dev, - "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", - iommu_def_domain_type); dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); + if (dom) { + dev_warn(dev, + "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", + iommu_def_domain_type); + } } group->default_domain = dom; -- cgit From ed79dac98c5e9f8471456afe2cc09a3912586b52 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Fri, 22 Mar 2019 18:10:22 -0700 Subject: xfs: prohibit fstrim in norecovery mode The xfs fstrim implementation uses the free space btrees to find free space that can be discarded. If we haven't recovered the log, the bnobt will be stale and we absolutely *cannot* use stale metadata to zap the underlying storage. Signed-off-by: Darrick J. Wong Reviewed-by: Eric Sandeen --- fs/xfs/xfs_discard.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 93f07edafd81..9ee2a7d02e70 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -161,6 +161,14 @@ xfs_ioc_trim( return -EPERM; if (!blk_queue_discard(q)) return -EOPNOTSUPP; + + /* + * We haven't recovered the log, so we cannot use our bnobt-guided + * storage zapping commands. + */ + if (mp->m_flags & XFS_MOUNT_NORECOVERY) + return -EROFS; + if (copy_from_user(&range, urange, sizeof(range))) return -EFAULT; -- cgit From 113ce08109f8e3b091399e7cc32486df1cff48e7 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Mon, 25 Mar 2019 10:38:58 +0100 Subject: ALSA: pcm: Don't suspend stream in unrecoverable PCM state Currently PCM core sets each opened stream forcibly to SUSPENDED state via snd_pcm_suspend_all() call, and the user-space is responsible for re-triggering the resume manually either via snd_pcm_resume() or prepare call. The scheme works fine usually, but there are corner cases where the stream can't be resumed by that call: the streams still in OPEN state before finishing hw_params. When they are suspended, user-space cannot perform resume or prepare because they haven't been set up yet. The only possible recovery is to re-open the device, which isn't nice at all. Similarly, when a stream is in DISCONNECTED state, it makes no sense to change it to SUSPENDED state. Ditto for in SETUP state; which you can re-prepare directly. So, this patch addresses these issues by filtering the PCM streams to be suspended by checking the PCM state. When a stream is in either OPEN, SETUP or DISCONNECTED as well as already SUSPENDED, the suspend action is skipped. To be noted, this problem was originally reported for the PCM runtime PM on HD-audio. And, the runtime PM problem itself was already addressed (although not intended) by the code refactoring commits 3d21ef0b49f8 ("ALSA: pcm: Suspend streams globally via device type PM ops") and 17bc4815de58 ("ALSA: pci: Remove superfluous snd_pcm_suspend*() calls"). These commits eliminated the snd_pcm_suspend*() calls from the runtime PM suspend callback code path, hence the racy OPEN state won't appear while runtime PM. (FWIW, the race window is between snd_pcm_open_substream() and the first power up in azx_pcm_open().) Although the runtime PM issue was already "fixed", the same problem is still present for the system PM, hence this patch is still needed. And for stable trees, this patch alone should suffice for fixing the runtime PM problem, too. Reported-and-tested-by: Jon Hunter Cc: Signed-off-by: Takashi Iwai --- sound/core/pcm_native.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index f731f904e8cc..1d8452912b14 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -1445,8 +1445,15 @@ static int snd_pcm_pause(struct snd_pcm_substream *substream, int push) static int snd_pcm_pre_suspend(struct snd_pcm_substream *substream, int state) { struct snd_pcm_runtime *runtime = substream->runtime; - if (runtime->status->state == SNDRV_PCM_STATE_SUSPENDED) + switch (runtime->status->state) { + case SNDRV_PCM_STATE_SUSPENDED: return -EBUSY; + /* unresumable PCM state; return -EBUSY for skipping suspend */ + case SNDRV_PCM_STATE_OPEN: + case SNDRV_PCM_STATE_SETUP: + case SNDRV_PCM_STATE_DISCONNECTED: + return -EBUSY; + } runtime->trigger_master = substream; return 0; } -- cgit From 2dbed152e2d4c3fe2442284918d14797898b1e8a Mon Sep 17 00:00:00 2001 From: Sekhar Nori Date: Wed, 20 Feb 2019 16:36:52 +0530 Subject: ARM: davinci: fix build failure with allnoconfig allnoconfig build with just ARCH_DAVINCI enabled fails because drivers/clk/davinci/* depends on REGMAP being enabled. Fix it by selecting REGMAP_MMIO when building in DaVinci support. Signed-off-by: Sekhar Nori Reviewed-by: David Lechner Signed-off-by: Arnd Bergmann --- arch/arm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 054ead960f98..850b4805e2d1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -596,6 +596,7 @@ config ARCH_DAVINCI select HAVE_IDE select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF + select REGMAP_MMIO select RESET_CONTROLLER select SPARSE_IRQ select USE_OF -- cgit From fa9463564e77067df81b0b8dec91adbbbc47bfb4 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 18 Mar 2019 16:31:22 +0100 Subject: ARM: dts: nomadik: Fix polarity of SPI CS The SPI DT bindings are for historical reasons a pitfall, the ability to flag a GPIO line as active high/low with the second cell flags was introduced later so the SPI subsystem will only accept the bool flag spi-cs-high to indicate that the line is active high. It worked by mistake, but the mistake was corrected in another commit. The comment in the DTS file was also misleading: this CS is indeed active high. Fixes: cffbb02dafa3 ("ARM: dts: nomadik: Augment NHK15 panel setting") Signed-off-by: Linus Walleij Signed-off-by: Arnd Bergmann --- arch/arm/boot/dts/ste-nomadik-nhk15.dts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm/boot/dts/ste-nomadik-nhk15.dts b/arch/arm/boot/dts/ste-nomadik-nhk15.dts index 04066f9cb8a3..f2f6558a00f1 100644 --- a/arch/arm/boot/dts/ste-nomadik-nhk15.dts +++ b/arch/arm/boot/dts/ste-nomadik-nhk15.dts @@ -213,12 +213,13 @@ gpio-sck = <&gpio0 5 GPIO_ACTIVE_HIGH>; gpio-mosi = <&gpio0 4 GPIO_ACTIVE_HIGH>; /* - * It's not actually active high, but the frameworks assume - * the polarity of the passed-in GPIO is "normal" (active - * high) then actively drives the line low to select the - * chip. + * This chipselect is active high. Just setting the flags + * to GPIO_ACTIVE_HIGH is not enough for the SPI DT bindings, + * it will be ignored, only the special "spi-cs-high" flag + * really counts. */ cs-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>; + spi-cs-high; num-chipselects = <1>; /* -- cgit From 9e75ad5d8f399a21c86271571aa630dd080223e2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 15:34:53 +0100 Subject: io_uring: fix big-endian compat signal mask handling On big-endian architectures, the signal masks are differnet between 32-bit and 64-bit tasks, so we have to use a different function for reading them from user space. io_cqring_wait() initially got this wrong, and always interprets this as a native structure. This is ok on x86 and most arm64, but not on s390, ppc64be, mips64be, sparc64 and parisc. Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- fs/io_uring.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 6aaa30580a2b..8f48d29abf76 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1968,7 +1968,15 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, return 0; if (sig) { - ret = set_user_sigmask(sig, &ksigmask, &sigsaved, sigsz); +#ifdef CONFIG_COMPAT + if (in_compat_syscall()) + ret = set_compat_user_sigmask((const compat_sigset_t __user *)sig, + &ksigmask, &sigsaved, sigsz); + else +#endif + ret = set_user_sigmask(sig, &ksigmask, + &sigsaved, sigsz); + if (ret) return ret; } -- cgit From 93958742192e7956d05989836ada9071f9ffe42e Mon Sep 17 00:00:00 2001 From: Jonathan Hunter Date: Mon, 25 Mar 2019 12:28:07 +0100 Subject: arm64: tegra: Disable CQE Support for SDMMC4 on Tegra186 Enabling CQE support on Tegra186 Jetson TX2 has introduced a regression that is causing accesses to the file-system on the eMMC to fail. Errors such as the following have been observed ... mmc2: running CQE recovery mmc2: mmc_select_hs400 failed, error -110 print_req_error: I/O error, dev mmcblk2, sector 8 flags 80700 mmc2: cqhci: CQE failed to exit halt state For now disable CQE support for Tegra186 until this issue is resolved. Fixes: dfd3cb6feb73 arm64: tegra: Add CQE Support for SDMMC4 Signed-off-by: Jonathan Hunter Signed-off-by: Thierry Reding Signed-off-by: Arnd Bergmann --- arch/arm64/boot/dts/nvidia/tegra186.dtsi | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/boot/dts/nvidia/tegra186.dtsi b/arch/arm64/boot/dts/nvidia/tegra186.dtsi index bb2045be8814..97aeb946ed5e 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186.dtsi @@ -321,7 +321,6 @@ nvidia,default-trim = <0x9>; nvidia,dqs-trim = <63>; mmc-hs400-1_8v; - supports-cqe; status = "disabled"; }; -- cgit From 9dfec7ca0ba7ef987b3bbb71333d2f2182f5e030 Mon Sep 17 00:00:00 2001 From: Pierre-Yves MORDRET Date: Mon, 25 Mar 2019 17:21:55 +0100 Subject: dmaengine: stm32-mdma: Revert "dmaengine: stm32-mdma: Add a check on read_u32_array" This reverts commit 906b40b246b0 ("dmaengine: stm32-mdma: Add a check on read_u32_array") As stated by bindings "st,ahb-addr-masks" is optional. The statement inserted by this commit makes this property mandatory and prevents MDMA to be probed in case property not present. Signed-off-by: Pierre-Yves MORDRET Signed-off-by: Vinod Koul --- drivers/dma/stm32-mdma.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c index 4e0eede599a8..ac0301b69593 100644 --- a/drivers/dma/stm32-mdma.c +++ b/drivers/dma/stm32-mdma.c @@ -1578,11 +1578,9 @@ static int stm32_mdma_probe(struct platform_device *pdev) dmadev->nr_channels = nr_channels; dmadev->nr_requests = nr_requests; - ret = device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks", + device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks", dmadev->ahb_addr_masks, count); - if (ret) - return ret; dmadev->nr_ahb_addr_masks = count; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); -- cgit From e861857545567adec8da3bdff728efdf7db12285 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 25 Mar 2019 12:34:10 -0600 Subject: blk-mq: fix sbitmap ws_active for shared tags We now wrap sbitmap waitqueues in an active counter, so we can avoid iterating wakeups unless we have waiters there. This works as long as everyone that's manipulating the waitqueues use the proper helpers. For the tag wait case for shared tags, however, we add ourselves to the waitqueue without incrementing/decrementing the ->ws_active count. This means that wakeups can take a long time to happen. Fix this by manually doing the inc/dec as needed for the wait queue handling. Reported-by: Michael Leun Tested-by: Michael Leun Cc: stable@vger.kernel.org Reviewed-by: Omar Sandoval Fixes: 5d2ee7122c73 ("sbitmap: optimize wakeup check") Signed-off-by: Jens Axboe --- block/blk-mq.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 28080b0235f0..3ff3d7b49969 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1072,7 +1072,13 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, hctx = container_of(wait, struct blk_mq_hw_ctx, dispatch_wait); spin_lock(&hctx->dispatch_wait_lock); - list_del_init(&wait->entry); + if (!list_empty(&wait->entry)) { + struct sbitmap_queue *sbq; + + list_del_init(&wait->entry); + sbq = &hctx->tags->bitmap_tags; + atomic_dec(&sbq->ws_active); + } spin_unlock(&hctx->dispatch_wait_lock); blk_mq_run_hw_queue(hctx, true); @@ -1088,6 +1094,7 @@ static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, struct request *rq) { + struct sbitmap_queue *sbq = &hctx->tags->bitmap_tags; struct wait_queue_head *wq; wait_queue_entry_t *wait; bool ret; @@ -1110,7 +1117,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, if (!list_empty_careful(&wait->entry)) return false; - wq = &bt_wait_ptr(&hctx->tags->bitmap_tags, hctx)->wait; + wq = &bt_wait_ptr(sbq, hctx)->wait; spin_lock_irq(&wq->lock); spin_lock(&hctx->dispatch_wait_lock); @@ -1120,6 +1127,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, return false; } + atomic_inc(&sbq->ws_active); wait->flags &= ~WQ_FLAG_EXCLUSIVE; __add_wait_queue(wq, wait); @@ -1140,6 +1148,7 @@ static bool blk_mq_mark_tag_wait(struct blk_mq_hw_ctx *hctx, * someone else gets the wakeup. */ list_del_init(&wait->entry); + atomic_dec(&sbq->ws_active); spin_unlock(&hctx->dispatch_wait_lock); spin_unlock_irq(&wq->lock); -- cgit From e6d1fa584e0dd9bfebaf345e9feea588cf75ead2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 22 Mar 2019 09:13:51 +0800 Subject: sbitmap: order READ/WRITE freed instance and setting clear bit Inside sbitmap_queue_clear(), once the clear bit is set, it will be visiable to allocation path immediately. Meantime READ/WRITE on old associated instance(such as request in case of blk-mq) may be out-of-order with the setting clear bit, so race with re-allocation may be triggered. Adds one memory barrier for ordering READ/WRITE of the freed associated instance with setting clear bit for avoiding race with re-allocation. The following kernel oops triggerd by block/006 on aarch64 may be fixed: [ 142.330954] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000330 [ 142.338794] Mem abort info: [ 142.341554] ESR = 0x96000005 [ 142.344632] Exception class = DABT (current EL), IL = 32 bits [ 142.350500] SET = 0, FnV = 0 [ 142.353544] EA = 0, S1PTW = 0 [ 142.356678] Data abort info: [ 142.359528] ISV = 0, ISS = 0x00000005 [ 142.363343] CM = 0, WnR = 0 [ 142.366305] user pgtable: 64k pages, 48-bit VAs, pgdp = 000000002a3c51c0 [ 142.372983] [0000000000000330] pgd=0000000000000000, pud=0000000000000000 [ 142.379777] Internal error: Oops: 96000005 [#1] SMP [ 142.384613] Modules linked in: null_blk ib_isert iscsi_target_mod ib_srpt target_core_mod ib_srp scsi_transport_srp vfat fat rpcrdma sunrpc rdma_ucm ib_iser rdma_cm iw_cm libiscsi ib_umad scsi_transport_iscsi ib_ipoib ib_cm mlx5_ib ib_uverbs ib_core sbsa_gwdt crct10dif_ce ghash_ce ipmi_ssif sha2_ce ipmi_devintf sha256_arm64 sg sha1_ce ipmi_msghandler ip_tables xfs libcrc32c mlx5_core sdhci_acpi mlxfw ahci_platform at803x sdhci libahci_platform qcom_emac mmc_core hdma hdma_mgmt i2c_dev [last unloaded: null_blk] [ 142.429753] CPU: 7 PID: 1983 Comm: fio Not tainted 5.0.0.cki #2 [ 142.449458] pstate: 00400005 (nzcv daif +PAN -UAO) [ 142.454239] pc : __blk_mq_free_request+0x4c/0xa8 [ 142.458830] lr : blk_mq_free_request+0xec/0x118 [ 142.463344] sp : ffff00003360f6a0 [ 142.466646] x29: ffff00003360f6a0 x28: ffff000010e70000 [ 142.471941] x27: ffff801729a50048 x26: 0000000000010000 [ 142.477232] x25: ffff00003360f954 x24: ffff7bdfff021440 [ 142.482529] x23: 0000000000000000 x22: 00000000ffffffff [ 142.487830] x21: ffff801729810000 x20: 0000000000000000 [ 142.493123] x19: ffff801729a50000 x18: 0000000000000000 [ 142.498413] x17: 0000000000000000 x16: 0000000000000001 [ 142.503709] x15: 00000000000000ff x14: ffff7fe000000000 [ 142.509003] x13: ffff8017dcde09a0 x12: 0000000000000000 [ 142.514308] x11: 0000000000000001 x10: 0000000000000008 [ 142.519597] x9 : ffff8017dcde09a0 x8 : 0000000000002000 [ 142.524889] x7 : ffff8017dcde0a00 x6 : 000000015388f9be [ 142.530187] x5 : 0000000000000001 x4 : 0000000000000000 [ 142.535478] x3 : 0000000000000000 x2 : 0000000000000000 [ 142.540777] x1 : 0000000000000001 x0 : ffff00001041b194 [ 142.546071] Process fio (pid: 1983, stack limit = 0x000000006460a0ea) [ 142.552500] Call trace: [ 142.554926] __blk_mq_free_request+0x4c/0xa8 [ 142.559181] blk_mq_free_request+0xec/0x118 [ 142.563352] blk_mq_end_request+0xfc/0x120 [ 142.567444] end_cmd+0x3c/0xa8 [null_blk] [ 142.571434] null_complete_rq+0x20/0x30 [null_blk] [ 142.576194] blk_mq_complete_request+0x108/0x148 [ 142.580797] null_handle_cmd+0x1d4/0x718 [null_blk] [ 142.585662] null_queue_rq+0x60/0xa8 [null_blk] [ 142.590171] blk_mq_try_issue_directly+0x148/0x280 [ 142.594949] blk_mq_try_issue_list_directly+0x9c/0x108 [ 142.600064] blk_mq_sched_insert_requests+0xb0/0xd0 [ 142.604926] blk_mq_flush_plug_list+0x16c/0x2a0 [ 142.609441] blk_flush_plug_list+0xec/0x118 [ 142.613608] blk_finish_plug+0x3c/0x4c [ 142.617348] blkdev_direct_IO+0x3b4/0x428 [ 142.621336] generic_file_read_iter+0x84/0x180 [ 142.625761] blkdev_read_iter+0x50/0x78 [ 142.629579] aio_read.isra.6+0xf8/0x190 [ 142.633409] __io_submit_one.isra.8+0x148/0x738 [ 142.637912] io_submit_one.isra.9+0x88/0xb8 [ 142.642078] __arm64_sys_io_submit+0xe0/0x238 [ 142.646428] el0_svc_handler+0xa0/0x128 [ 142.650238] el0_svc+0x8/0xc [ 142.653104] Code: b9402a63 f9000a7f 3100047f 540000a0 (f9419a81) [ 142.659202] ---[ end trace 467586bc175eb09d ]--- Fixes: ea86ea2cdced20057da ("sbitmap: ammortize cost of clearing bits") Reported-and-bisected_and_tested-by: Yi Zhang Cc: Yi Zhang Cc: "jianchao.wang" Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- lib/sbitmap.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 5b382c1244ed..155fe38756ec 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -591,6 +591,17 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_wake_up); void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, unsigned int cpu) { + /* + * Once the clear bit is set, the bit may be allocated out. + * + * Orders READ/WRITE on the asssociated instance(such as request + * of blk_mq) by this bit for avoiding race with re-allocation, + * and its pair is the memory barrier implied in __sbitmap_get_word. + * + * One invariant is that the clear bit has to be zero when the bit + * is in use. + */ + smp_mb__before_atomic(); sbitmap_deferred_clear_bit(&sbq->sb, nr); /* -- cgit From 9bf7933fc3f306bc4ce74ad734f690a71670178a Mon Sep 17 00:00:00 2001 From: Roman Penyaev Date: Mon, 25 Mar 2019 20:09:24 +0100 Subject: io_uring: offload write to async worker in case of -EAGAIN In case of direct write -EAGAIN will be returned if page cache was previously populated. To avoid immediate completion of a request with -EAGAIN error write has to be offloaded to the async worker, like io_read() does. Signed-off-by: Roman Penyaev Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 8f48d29abf76..bbdbd56cf2ac 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1022,6 +1022,8 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, ret = rw_verify_area(WRITE, file, &kiocb->ki_pos, iov_count); if (!ret) { + ssize_t ret2; + /* * Open-code file_start_write here to grab freeze protection, * which will be released by another thread in @@ -1036,7 +1038,19 @@ static int io_write(struct io_kiocb *req, const struct sqe_submit *s, SB_FREEZE_WRITE); } kiocb->ki_flags |= IOCB_WRITE; - io_rw_done(kiocb, call_write_iter(file, kiocb, &iter)); + + ret2 = call_write_iter(file, kiocb, &iter); + if (!force_nonblock || ret2 != -EAGAIN) { + io_rw_done(kiocb, ret2); + } else { + /* + * If ->needs_lock is true, we're already in async + * context. + */ + if (!s->needs_lock) + io_async_list_note(WRITE, req, iov_count); + ret = -EAGAIN; + } } out_free: kfree(iovec); -- cgit From 3b9c2f2e0e99bb67c96abcb659b3465efe3bee1f Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Sun, 24 Mar 2019 18:53:49 +0000 Subject: staging: vt6655: Fix interrupt race condition on device start up. It appears on some slower systems that the driver can find its way out of the workqueue while the interrupt is disabled by continuous polling by it. Move MACvIntEnable to vnt_interrupt_work so that it is always enabled on all routes out of vnt_interrupt_process. Move MACvIntDisable so that the device doesn't keep polling the system while the workqueue is being processed. Signed-off-by: Malcolm Priestley CC: stable@vger.kernel.org # v4.2+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index b370985b58a1..83f1a1cf9182 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1033,8 +1033,6 @@ static void vnt_interrupt_process(struct vnt_private *priv) return; } - MACvIntDisable(priv->PortOffset); - spin_lock_irqsave(&priv->lock, flags); /* Read low level stats */ @@ -1122,8 +1120,6 @@ static void vnt_interrupt_process(struct vnt_private *priv) } spin_unlock_irqrestore(&priv->lock, flags); - - MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE); } static void vnt_interrupt_work(struct work_struct *work) @@ -1133,6 +1129,8 @@ static void vnt_interrupt_work(struct work_struct *work) if (priv->vif) vnt_interrupt_process(priv); + + MACvIntEnable(priv->PortOffset, IMR_MASK_VALUE); } static irqreturn_t vnt_interrupt(int irq, void *arg) @@ -1142,6 +1140,8 @@ static irqreturn_t vnt_interrupt(int irq, void *arg) if (priv->vif) schedule_work(&priv->interrupt_work); + MACvIntDisable(priv->PortOffset); + return IRQ_HANDLED; } -- cgit From b6391ac73400eff38377a4a7364bd3df5efb5178 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Mon, 25 Mar 2019 11:40:07 +0800 Subject: staging: erofs: fix error handling when failed to read compresssed data Complete read error handling paths for all three kinds of compressed pages: 1) For cache-managed pages, PG_uptodate will be checked since read_endio will unlock and SetPageUptodate for these pages; 2) For inplaced pages, read_endio cannot SetPageUptodate directly since it should be used to mark the final decompressed data, PG_error will be set with page locked for IO error instead; 3) For staging pages, PG_error is used, which is similar to what we do for inplaced pages. Fixes: 3883a79abd02 ("staging: erofs: introduce VLE decompression support") Cc: # 4.19+ Reviewed-by: Chao Yu Signed-off-by: Gao Xiang Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/unzip_vle.c | 41 ++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) diff --git a/drivers/staging/erofs/unzip_vle.c b/drivers/staging/erofs/unzip_vle.c index c7b3b21123c1..31eef8395774 100644 --- a/drivers/staging/erofs/unzip_vle.c +++ b/drivers/staging/erofs/unzip_vle.c @@ -972,6 +972,7 @@ repeat: overlapped = false; compressed_pages = grp->compressed_pages; + err = 0; for (i = 0; i < clusterpages; ++i) { unsigned int pagenr; @@ -981,26 +982,39 @@ repeat: DBG_BUGON(!page); DBG_BUGON(!page->mapping); - if (z_erofs_is_stagingpage(page)) - continue; + if (!z_erofs_is_stagingpage(page)) { #ifdef EROFS_FS_HAS_MANAGED_CACHE - if (page->mapping == MNGD_MAPPING(sbi)) { - DBG_BUGON(!PageUptodate(page)); - continue; - } + if (page->mapping == MNGD_MAPPING(sbi)) { + if (unlikely(!PageUptodate(page))) + err = -EIO; + continue; + } #endif - /* only non-head page could be reused as a compressed page */ - pagenr = z_erofs_onlinepage_index(page); + /* + * only if non-head page can be selected + * for inplace decompression + */ + pagenr = z_erofs_onlinepage_index(page); - DBG_BUGON(pagenr >= nr_pages); - DBG_BUGON(pages[pagenr]); - ++sparsemem_pages; - pages[pagenr] = page; + DBG_BUGON(pagenr >= nr_pages); + DBG_BUGON(pages[pagenr]); + ++sparsemem_pages; + pages[pagenr] = page; + + overlapped = true; + } - overlapped = true; + /* PG_error needs checking for inplaced and staging pages */ + if (unlikely(PageError(page))) { + DBG_BUGON(PageUptodate(page)); + err = -EIO; + } } + if (unlikely(err)) + goto out; + llen = (nr_pages << PAGE_SHIFT) - work->pageofs; if (z_erofs_vle_workgrp_fmt(grp) == Z_EROFS_VLE_WORKGRP_FMT_PLAIN) { @@ -1198,6 +1212,7 @@ repeat: if (page->mapping == mc) { WRITE_ONCE(grp->compressed_pages[nr], page); + ClearPageError(page); if (!PagePrivate(page)) { /* * impossible to be !PagePrivate(page) for -- cgit From 9b9c87cf51783cbe7140c51472762094033cfeab Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 25 Mar 2019 11:56:59 +0300 Subject: staging: vc04_services: Fix an error code in vchiq_probe() We need to set "err" on this error path. Fixes: 187ac53e590c ("staging: vchiq_arm: rework probe and init functions") Signed-off-by: Dan Carpenter Acked-by: Stefan Wahren Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index 804daf83be35..064d0db4c51e 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -3513,6 +3513,7 @@ static int vchiq_probe(struct platform_device *pdev) struct device_node *fw_node; const struct of_device_id *of_id; struct vchiq_drvdata *drvdata; + struct device *vchiq_dev; int err; of_id = of_match_node(vchiq_of_match, pdev->dev.of_node); @@ -3547,9 +3548,12 @@ static int vchiq_probe(struct platform_device *pdev) goto failed_platform_init; } - if (IS_ERR(device_create(vchiq_class, &pdev->dev, vchiq_devid, - NULL, "vchiq"))) + vchiq_dev = device_create(vchiq_class, &pdev->dev, vchiq_devid, NULL, + "vchiq"); + if (IS_ERR(vchiq_dev)) { + err = PTR_ERR(vchiq_dev); goto failed_device_create; + } vchiq_debugfs_init(); -- cgit From 9498da46d1cef51ae29f595a9621341acecfa9ab Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 25 Mar 2019 22:48:01 +0200 Subject: staging: octeon-ethernet: fix incorrect PHY mode When connecting PHY, we set the mode to PHY_INTERFACE_MODE_GMII which is not always correct. Specifically on boards where RGMII_RXID is needed networking now longer works with at803x after commit 6d4cd041f0af ("net: phy: at803x: disable delay only for RGMII mode"). Fix by passing the correct mode. Tested on EdgeRouter Lite (RGMII_RXID, at803x PHY) and D-Link DSR-500N (RGMII, broadcom PHY). Fixes: 6d4cd041f0af ("net: phy: at803x: disable delay only for RGMII mode") Signed-off-by: Aaro Koskinen Signed-off-by: Greg Kroah-Hartman --- drivers/staging/octeon/ethernet-mdio.c | 2 +- drivers/staging/octeon/ethernet.c | 40 ++++++++++++++++++++++++++++---- drivers/staging/octeon/octeon-ethernet.h | 4 +++- 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index d6248eecf123..2aee64fdaec5 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -163,7 +163,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev) goto no_phy; phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); + priv->phy_mode); of_node_put(phy_node); if (!phydev) diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index ce61c5670ef6..986db76705cc 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -653,14 +653,37 @@ static struct device_node *cvm_oct_node_for_port(struct device_node *pip, return np; } -static void cvm_set_rgmii_delay(struct device_node *np, int iface, int port) +static void cvm_set_rgmii_delay(struct octeon_ethernet *priv, int iface, + int port) { + struct device_node *np = priv->of_node; u32 delay_value; + bool rx_delay; + bool tx_delay; - if (!of_property_read_u32(np, "rx-delay", &delay_value)) + /* By default, both RX/TX delay is enabled in + * __cvmx_helper_rgmii_enable(). + */ + rx_delay = true; + tx_delay = true; + + if (!of_property_read_u32(np, "rx-delay", &delay_value)) { cvmx_write_csr(CVMX_ASXX_RX_CLK_SETX(port, iface), delay_value); - if (!of_property_read_u32(np, "tx-delay", &delay_value)) + rx_delay = delay_value > 0; + } + if (!of_property_read_u32(np, "tx-delay", &delay_value)) { cvmx_write_csr(CVMX_ASXX_TX_CLK_SETX(port, iface), delay_value); + tx_delay = delay_value > 0; + } + + if (!rx_delay && !tx_delay) + priv->phy_mode = PHY_INTERFACE_MODE_RGMII_ID; + else if (!rx_delay) + priv->phy_mode = PHY_INTERFACE_MODE_RGMII_RXID; + else if (!tx_delay) + priv->phy_mode = PHY_INTERFACE_MODE_RGMII_TXID; + else + priv->phy_mode = PHY_INTERFACE_MODE_RGMII; } static int cvm_oct_probe(struct platform_device *pdev) @@ -825,6 +848,7 @@ static int cvm_oct_probe(struct platform_device *pdev) priv->port = port; priv->queue = cvmx_pko_get_base_queue(priv->port); priv->fau = fau - cvmx_pko_get_num_queues(port) * 4; + priv->phy_mode = PHY_INTERFACE_MODE_NA; for (qos = 0; qos < 16; qos++) skb_queue_head_init(&priv->tx_free_list[qos]); for (qos = 0; qos < cvmx_pko_get_num_queues(port); @@ -856,6 +880,7 @@ static int cvm_oct_probe(struct platform_device *pdev) break; case CVMX_HELPER_INTERFACE_MODE_SGMII: + priv->phy_mode = PHY_INTERFACE_MODE_SGMII; dev->netdev_ops = &cvm_oct_sgmii_netdev_ops; strcpy(dev->name, "eth%d"); break; @@ -865,11 +890,16 @@ static int cvm_oct_probe(struct platform_device *pdev) strcpy(dev->name, "spi%d"); break; - case CVMX_HELPER_INTERFACE_MODE_RGMII: case CVMX_HELPER_INTERFACE_MODE_GMII: + priv->phy_mode = PHY_INTERFACE_MODE_GMII; + dev->netdev_ops = &cvm_oct_rgmii_netdev_ops; + strcpy(dev->name, "eth%d"); + break; + + case CVMX_HELPER_INTERFACE_MODE_RGMII: dev->netdev_ops = &cvm_oct_rgmii_netdev_ops; strcpy(dev->name, "eth%d"); - cvm_set_rgmii_delay(priv->of_node, interface, + cvm_set_rgmii_delay(priv, interface, port_index); break; } diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h index 4a07e7f43d12..be570d33685a 100644 --- a/drivers/staging/octeon/octeon-ethernet.h +++ b/drivers/staging/octeon/octeon-ethernet.h @@ -12,7 +12,7 @@ #define OCTEON_ETHERNET_H #include - +#include #include /** @@ -33,6 +33,8 @@ struct octeon_ethernet { * cvmx_helper_interface_mode_t */ int imode; + /* PHY mode */ + phy_interface_t phy_mode; /* List of outstanding tx buffers per queue */ struct sk_buff_head tx_free_list[16]; unsigned int last_speed; -- cgit From 187df76325af5d9e12ae9daec1510307797e54f0 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 22 Mar 2019 22:14:19 +0100 Subject: libceph: fix breakage caused by multipage bvecs A bvec can now consist of multiple physically contiguous pages. This means that bvec_iter_advance() can move to a different page while staying in the same bvec (i.e. ->bi_bvec_done != 0). The messenger works in terms of segments which can now be defined as the smaller of a bvec and a page. The "more bytes to process in this segment" condition holds only if bvec_iter_advance() leaves us in the same bvec _and_ in the same page. On next bvec (possibly in the same page) and on next page (possibly in the same bvec) we may need to set ->last_piece. Signed-off-by: Ilya Dryomov --- net/ceph/messenger.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 7e71b0df1fbc..3083988ce729 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -840,6 +840,7 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) { struct ceph_bio_iter *it = &cursor->bio_iter; + struct page *page = bio_iter_page(it->bio, it->iter); BUG_ON(bytes > cursor->resid); BUG_ON(bytes > bio_iter_len(it->bio, it->iter)); @@ -851,7 +852,8 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor, return false; /* no more data */ } - if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done)) + if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done && + page == bio_iter_page(it->bio, it->iter))) return false; /* more bytes to process in this segment */ if (!it->iter.bi_size) { @@ -899,6 +901,7 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, size_t bytes) { struct bio_vec *bvecs = cursor->data->bvec_pos.bvecs; + struct page *page = bvec_iter_page(bvecs, cursor->bvec_iter); BUG_ON(bytes > cursor->resid); BUG_ON(bytes > bvec_iter_len(bvecs, cursor->bvec_iter)); @@ -910,7 +913,8 @@ static bool ceph_msg_data_bvecs_advance(struct ceph_msg_data_cursor *cursor, return false; /* no more data */ } - if (!bytes || cursor->bvec_iter.bi_bvec_done) + if (!bytes || (cursor->bvec_iter.bi_bvec_done && + page == bvec_iter_page(bvecs, cursor->bvec_iter))) return false; /* more bytes to process in this segment */ BUG_ON(cursor->last_piece); -- cgit From a3ac7917b73070010c05b4485b8582a6c9cd69b6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Mar 2019 14:49:00 -0700 Subject: Revert "parport: daisy: use new parport device model" This reverts commit 1aec4211204d9463d1fd209eb50453de16254599. Steven Rostedt reports that it causes a hang at bootup and bisected it to this commit. The troigger is apparently a module alias for "parport_lowlevel" that points to "parport_pc", which causes a hang with modprobe -q -- parport_lowlevel blocking forever with a backtrace like this: wait_for_completion_killable+0x1c/0x28 call_usermodehelper_exec+0xa7/0x108 __request_module+0x351/0x3d8 get_lowlevel_driver+0x28/0x41 [parport] __parport_register_driver+0x39/0x1f4 [parport] daisy_drv_init+0x31/0x4f [parport] parport_bus_init+0x5d/0x7b [parport] parport_default_proc_register+0x26/0x1000 [parport] do_one_initcall+0xc2/0x1e0 do_init_module+0x50/0x1d4 load_module+0x1c2e/0x21b3 sys_init_module+0xef/0x117 Supid says: "Due to the new device model daisy driver will now try to find the parallel ports while trying to register its driver so that it can bind with them. Now, since daisy driver is loaded while parport bus is initialising the list of parport is still empty and it tries to load the lowlevel driver, which has an alias set to parport_pc, now causes a deadlock" But I don't think the daisy driver should be loaded by the parport initialization in the first place, so let's revert the whole change. If the daisy driver can just initialize separately on its own (like a driver should), instead of hooking into the parport init sequence directly, this issue probably would go away. Reported-and-bisected-by: Steven Rostedt (VMware) Reported-by: Michal Kubecek Acked-by: Greg Kroah-Hartman Cc: Sudip Mukherjee Signed-off-by: Linus Torvalds --- drivers/parport/daisy.c | 32 +------------------------------- drivers/parport/probe.c | 2 +- drivers/parport/share.c | 10 +--------- include/linux/parport.h | 13 ------------- 4 files changed, 3 insertions(+), 54 deletions(-) diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c index 56dd83a45e55..5484a46dafda 100644 --- a/drivers/parport/daisy.c +++ b/drivers/parport/daisy.c @@ -213,12 +213,10 @@ void parport_daisy_fini(struct parport *port) struct pardevice *parport_open(int devnum, const char *name) { struct daisydev *p = topology; - struct pardev_cb par_cb; struct parport *port; struct pardevice *dev; int daisy; - memset(&par_cb, 0, sizeof(par_cb)); spin_lock(&topology_lock); while (p && p->devnum != devnum) p = p->next; @@ -232,7 +230,7 @@ struct pardevice *parport_open(int devnum, const char *name) port = parport_get_port(p->port); spin_unlock(&topology_lock); - dev = parport_register_dev_model(port, name, &par_cb, devnum); + dev = parport_register_device(port, name, NULL, NULL, NULL, 0, NULL); parport_put_port(port); if (!dev) return NULL; @@ -482,31 +480,3 @@ static int assign_addrs(struct parport *port) kfree(deviceid); return detected; } - -static int daisy_drv_probe(struct pardevice *par_dev) -{ - struct device_driver *drv = par_dev->dev.driver; - - if (strcmp(drv->name, "daisy_drv")) - return -ENODEV; - if (strcmp(par_dev->name, daisy_dev_name)) - return -ENODEV; - - return 0; -} - -static struct parport_driver daisy_driver = { - .name = "daisy_drv", - .probe = daisy_drv_probe, - .devmodel = true, -}; - -int daisy_drv_init(void) -{ - return parport_register_driver(&daisy_driver); -} - -void daisy_drv_exit(void) -{ - parport_unregister_driver(&daisy_driver); -} diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c index e5e6a463a941..e035174ba205 100644 --- a/drivers/parport/probe.c +++ b/drivers/parport/probe.c @@ -257,7 +257,7 @@ static ssize_t parport_read_device_id (struct parport *port, char *buffer, ssize_t parport_device_id (int devnum, char *buffer, size_t count) { ssize_t retval = -ENXIO; - struct pardevice *dev = parport_open(devnum, daisy_dev_name); + struct pardevice *dev = parport_open (devnum, "Device ID probe"); if (!dev) return -ENXIO; diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 0171b8dbcdcd..5dc53d420ca8 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -137,19 +137,11 @@ static struct bus_type parport_bus_type = { int parport_bus_init(void) { - int retval; - - retval = bus_register(&parport_bus_type); - if (retval) - return retval; - daisy_drv_init(); - - return 0; + return bus_register(&parport_bus_type); } void parport_bus_exit(void) { - daisy_drv_exit(); bus_unregister(&parport_bus_type); } diff --git a/include/linux/parport.h b/include/linux/parport.h index f41f1d041e2c..397607a0c0eb 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -460,7 +460,6 @@ extern size_t parport_ieee1284_epp_read_addr (struct parport *, void *, size_t, int); /* IEEE1284.3 functions */ -#define daisy_dev_name "Device ID probe" extern int parport_daisy_init (struct parport *port); extern void parport_daisy_fini (struct parport *port); extern struct pardevice *parport_open (int devnum, const char *name); @@ -469,18 +468,6 @@ extern ssize_t parport_device_id (int devnum, char *buffer, size_t len); extern void parport_daisy_deselect_all (struct parport *port); extern int parport_daisy_select (struct parport *port, int daisy, int mode); -#ifdef CONFIG_PARPORT_1284 -extern int daisy_drv_init(void); -extern void daisy_drv_exit(void); -#else -static inline int daisy_drv_init(void) -{ - return 0; -} - -static inline void daisy_drv_exit(void) {} -#endif - /* Lowlevel drivers _can_ call this support function to handle irqs. */ static inline void parport_generic_irq(struct parport *port) { -- cgit From edef1ef134180149694b86386277076f566d165c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 25 Mar 2019 09:04:39 -0700 Subject: ACPI / CPPC: Fix guaranteed performance handling As per the ACPI specification, "Guaranteed Performance Register" is a "Buffer" field and it cannot be "Integer", so treat the "Integer" type for "Guaranteed Performance Register" field as invalid and ignore its value in that case. Also save one cpc_read() call when "Guaranteed Performance Register" is not present, which means a register defined as: "Register(SystemMemory, 0, 0, 0, 0)". Fixes: 29523f095397 ("ACPI / CPPC: Add support for guaranteed performance") Suggested-by: Rafael J. Wysocki Signed-off-by: Srinivas Pandruvada Cc: 4.20+ # 4.20+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/cppc_acpi.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c index 1b207fca1420..d4244e7d0e38 100644 --- a/drivers/acpi/cppc_acpi.c +++ b/drivers/acpi/cppc_acpi.c @@ -1150,8 +1150,13 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps) cpc_read(cpunum, nominal_reg, &nom); perf_caps->nominal_perf = nom; - cpc_read(cpunum, guaranteed_reg, &guaranteed); - perf_caps->guaranteed_perf = guaranteed; + if (guaranteed_reg->type != ACPI_TYPE_BUFFER || + IS_NULL_REG(&guaranteed_reg->cpc_entry.reg)) { + perf_caps->guaranteed_perf = 0; + } else { + cpc_read(cpunum, guaranteed_reg, &guaranteed); + perf_caps->guaranteed_perf = guaranteed; + } cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear); perf_caps->lowest_nonlinear_perf = min_nonlinear; -- cgit From 92a3e426ec06e72b1c363179c79d30712447ff76 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Mon, 25 Mar 2019 09:04:40 -0700 Subject: cpufreq: intel_pstate: Also use CPPC nominal_perf for base_frequency The ACPI specification states that if the "Guaranteed Performance Register" is not implemented, the OSPM assumes guaranteed performance to always be equal to nominal performance. So for invalid or unimplemented guaranteed performance register, use nominal performance as guaranteed performance. This change will fall back to nominal_perf when guranteed_perf is invalid. If nominal_perf is also invalid or not present, fall back to the existing implementation, which is to read from HWP Capabilities MSR. Fixes: 86d333a8cc7f ("cpufreq: intel_pstate: Add base_frequency attribute") Suggested-by: Rafael J. Wysocki Signed-off-by: Srinivas Pandruvada Cc: 4.20+ # 4.20+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/intel_pstate.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index e22f0dbaebb1..b599c7318aab 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -385,7 +385,10 @@ static int intel_pstate_get_cppc_guranteed(int cpu) if (ret) return ret; - return cppc_perf.guaranteed_perf; + if (cppc_perf.guaranteed_perf) + return cppc_perf.guaranteed_perf; + + return cppc_perf.nominal_perf; } #else /* CONFIG_ACPI_CPPC_LIB */ -- cgit From 3e82a7f9031f204ac0f8dea494ac3870ad111261 Mon Sep 17 00:00:00 2001 From: Alexandru Gagniuc Date: Fri, 22 Mar 2019 19:36:51 -0500 Subject: PCI/LINK: Supply IRQ handler so level-triggered IRQs are acked A threaded IRQ with a NULL handler does not work with level-triggered interrupts. request_threaded_irq() will return an error: genirq: Threaded irq requested with handler=NULL and !ONESHOT for irq 16 pcie_bw_notification: probe of 0000:00:1b.0:pcie010 failed with error -22 For level interrupts we need to silence the interrupt before exiting the IRQ handler, so just clear the PCI_EXP_LNKSTA_LBMS bit there. Fixes: e8303bb7a75c ("PCI/LINK: Report degraded links via link bandwidth notification") Reported-by: Linus Torvalds Reported-by: Borislav Petkov Signed-off-by: Alexandru Gagniuc Signed-off-by: Bjorn Helgaas --- drivers/pci/pcie/bw_notification.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/drivers/pci/pcie/bw_notification.c b/drivers/pci/pcie/bw_notification.c index d2eae3b7cc0f..c48746f1cf3c 100644 --- a/drivers/pci/pcie/bw_notification.c +++ b/drivers/pci/pcie/bw_notification.c @@ -44,11 +44,10 @@ static void pcie_disable_link_bandwidth_notification(struct pci_dev *dev) pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl); } -static irqreturn_t pcie_bw_notification_handler(int irq, void *context) +static irqreturn_t pcie_bw_notification_irq(int irq, void *context) { struct pcie_device *srv = context; struct pci_dev *port = srv->port; - struct pci_dev *dev; u16 link_status, events; int ret; @@ -58,6 +57,17 @@ static irqreturn_t pcie_bw_notification_handler(int irq, void *context) if (ret != PCIBIOS_SUCCESSFUL || !events) return IRQ_NONE; + pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); + pcie_update_link_speed(port->subordinate, link_status); + return IRQ_WAKE_THREAD; +} + +static irqreturn_t pcie_bw_notification_handler(int irq, void *context) +{ + struct pcie_device *srv = context; + struct pci_dev *port = srv->port; + struct pci_dev *dev; + /* * Print status from downstream devices, not this root port or * downstream switch port. @@ -67,8 +77,6 @@ static irqreturn_t pcie_bw_notification_handler(int irq, void *context) __pcie_print_link_status(dev, false); up_read(&pci_bus_sem); - pcie_update_link_speed(port->subordinate, link_status); - pcie_capability_write_word(port, PCI_EXP_LNKSTA, events); return IRQ_HANDLED; } @@ -80,7 +88,8 @@ static int pcie_bandwidth_notification_probe(struct pcie_device *srv) if (!pcie_link_bandwidth_notification_supported(srv->port)) return -ENODEV; - ret = request_threaded_irq(srv->irq, NULL, pcie_bw_notification_handler, + ret = request_threaded_irq(srv->irq, pcie_bw_notification_irq, + pcie_bw_notification_handler, IRQF_SHARED, "PCIe BW notif", srv); if (ret) return ret; -- cgit From 55397ce8df48bdabe56abdc684764529e1334766 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 12:05:30 +0100 Subject: PCI/LINK: Clear bandwidth notification interrupt before enabling it When booting a MacBookPro9,1, duplicate link downtraining messages are logged for the devices directly attached to the two CPU-internal Root Ports of the Core i7 3615QM: Once on device enumeration and once on enablement of the bandwidth notification interrupt on the Root Ports. Duplicate messages do not occur with Root Ports on the PCH and Downstream Ports on the Thunderbolt controller: Only a single message is logged for these, namely on device enumeration. The reason for the duplicate messages is a stale interrupt in the Link Status register of the 3615QM's internal Root Ports. Avoid by clearing the interrupt before enabling it. An alternative approach would be to clear the interrupt already on device enumeration or to report link downtraining only if the speed has changed. That way, link downtraining occurring between device enumeration and enablement of the bandwidth notification interrupt could be caught. However clearing stale interrupts before enabling them is a standard operating procedure for any driver and keeping the two steps in one place makes the code easier to follow. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Alexandru Gagniuc --- drivers/pci/pcie/bw_notification.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/pcie/bw_notification.c b/drivers/pci/pcie/bw_notification.c index c48746f1cf3c..3c0f368c879b 100644 --- a/drivers/pci/pcie/bw_notification.c +++ b/drivers/pci/pcie/bw_notification.c @@ -30,6 +30,8 @@ static void pcie_enable_link_bandwidth_notification(struct pci_dev *dev) { u16 lnk_ctl; + pcie_capability_write_word(dev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS); + pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl); lnk_ctl |= PCI_EXP_LNKCTL_LBMIE; pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl); -- cgit From 0fa635aec9abd718bd18c0bda2261351a0811efc Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Wed, 20 Mar 2019 12:05:30 +0100 Subject: PCI/LINK: Deduplicate bandwidth reports for multi-function devices If a multi-function device's bandwidth is already limited when it is enumerated, a message is logged only for function 0. By contrast, when downtraining occurs after enumeration, a message is logged for all functions. That's because the former uses pcie_report_downtraining(), whereas the latter uses __pcie_print_link_status() (which doesn't filter functions != 0). I am seeing this happen on a MacBookPro9,1 with a GPU (function 0) and an integrated HDA controller (function 1). Avoid this incongruence by calling pcie_report_downtraining() in both cases. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Reviewed-by: Alexandru Gagniuc --- drivers/pci/pci.h | 1 + drivers/pci/pcie/bw_notification.c | 2 +- drivers/pci/probe.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 224d88634115..d994839a3e24 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -273,6 +273,7 @@ enum pcie_link_width pcie_get_width_cap(struct pci_dev *dev); u32 pcie_bandwidth_capable(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); void __pcie_print_link_status(struct pci_dev *dev, bool verbose); +void pcie_report_downtraining(struct pci_dev *dev); /* Single Root I/O Virtualization */ struct pci_sriov { diff --git a/drivers/pci/pcie/bw_notification.c b/drivers/pci/pcie/bw_notification.c index 3c0f368c879b..4fa9e3523ee1 100644 --- a/drivers/pci/pcie/bw_notification.c +++ b/drivers/pci/pcie/bw_notification.c @@ -76,7 +76,7 @@ static irqreturn_t pcie_bw_notification_handler(int irq, void *context) */ down_read(&pci_bus_sem); list_for_each_entry(dev, &port->subordinate->devices, bus_list) - __pcie_print_link_status(dev, false); + pcie_report_downtraining(dev); up_read(&pci_bus_sem); return IRQ_HANDLED; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 2ec0df04e0dc..7e12d0163863 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2388,7 +2388,7 @@ static struct pci_dev *pci_scan_device(struct pci_bus *bus, int devfn) return dev; } -static void pcie_report_downtraining(struct pci_dev *dev) +void pcie_report_downtraining(struct pci_dev *dev) { if (!pci_is_pcie(dev)) return; -- cgit From d29f5aa0bc0c321e1b9e4658a2a7e08e885da52a Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Fri, 22 Mar 2019 20:00:20 +0100 Subject: net: phy: don't clear BMCR in genphy_soft_reset So far we effectively clear the BMCR register. Some PHY's can deal with this (e.g. because they reset BMCR to a default as part of a soft-reset) whilst on others this causes issues because e.g. the autoneg bit is cleared. Marvell is an example, see also thread [0]. So let's be a little bit more gentle and leave all bits we're not interested in as-is. This change is needed for PHY drivers to properly deal with the original patch. [0] https://marc.info/?t=155264050700001&r=1&w=2 Fixes: 6e2d85ec0559 ("net: phy: Stop with excessive soft reset") Tested-by: Phil Reid Tested-by: liweihang Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/phy_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 49fdd1ee798e..77068c545de0 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1831,7 +1831,7 @@ int genphy_soft_reset(struct phy_device *phydev) { int ret; - ret = phy_write(phydev, MII_BMCR, BMCR_RESET); + ret = phy_set_bits(phydev, MII_BMCR, BMCR_RESET); if (ret < 0) return ret; -- cgit From c2fe742ff6e77c5b4fe4ad273191ddf28fdea25e Mon Sep 17 00:00:00 2001 From: Sreekanth Reddy Date: Mon, 4 Mar 2019 07:26:35 -0500 Subject: scsi: mpt3sas: Fix kernel panic during expander reset During expander reset handling, the driver invokes kernel function scsi_host_find_tag() to obtain outstanding requests associated with the scsi host managed by the driver. Driver loops from tag value zero to hba queue depth to obtain the outstanding scmds. But when blk-mq is enabled, the block layer may return stale entry for one or more requests. This may lead to kernel panic if the returned value is inaccessible or the memory pointed by the returned value is reused. Reference of upstream discussion: https://patchwork.kernel.org/patch/10734933/ Instead of calling scsi_host_find_tag() API for each and every smid (smid is tag +1) from one to shost->can_queue, now driver will call this API (to obtain the outstanding scmd) only for those smid's which are outstanding at the driver level. Driver will determine whether this smid is outstanding at driver level by looking into it's corresponding MPI request frame, if its MPI request frame is empty, then it means that this smid is free and does not need to call scsi_host_find_tag() for it. By doing this, driver will invoke scsi_host_find_tag() for only those tags which are outstanding at the driver level. Driver will check whether particular MPI request frame is empty or not by looking into the "DevHandle" field. If this field is zero then it means that this MPI request is empty. For active MPI request DevHandle must be non-zero. Also driver will memset the MPI request frame once the corresponding scmd is processed (i.e. just before calling scmd->done function). Signed-off-by: Sreekanth Reddy Signed-off-by: Martin K. Petersen --- drivers/scsi/mpt3sas/mpt3sas_base.c | 6 ++++++ drivers/scsi/mpt3sas/mpt3sas_scsih.c | 12 ++++++++++++ 2 files changed, 18 insertions(+) diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index e57774472e75..1d8c584ec1e9 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -3281,12 +3281,18 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid) if (smid < ioc->hi_priority_smid) { struct scsiio_tracker *st; + void *request; st = _get_st_from_smid(ioc, smid); if (!st) { _base_recovery_check(ioc); return; } + + /* Clear MPI request frame */ + request = mpt3sas_base_get_msg_frame(ioc, smid); + memset(request, 0, ioc->request_sz); + mpt3sas_base_clear_st(ioc, st); _base_recovery_check(ioc); return; diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c index 8bb5b8f9f4d2..1ccfbc7eebe0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c +++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c @@ -1462,11 +1462,23 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid) { struct scsi_cmnd *scmd = NULL; struct scsiio_tracker *st; + Mpi25SCSIIORequest_t *mpi_request; if (smid > 0 && smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) { u32 unique_tag = smid - 1; + mpi_request = mpt3sas_base_get_msg_frame(ioc, smid); + + /* + * If SCSI IO request is outstanding at driver level then + * DevHandle filed must be non-zero. If DevHandle is zero + * then it means that this smid is free at driver level, + * so return NULL. + */ + if (!mpi_request->DevHandle) + return scmd; + scmd = scsi_host_find_tag(ioc->shost, unique_tag); if (scmd) { st = scsi_cmd_priv(scmd); -- cgit From b6554cfe09e1f610aed7d57164ab7760be57acd9 Mon Sep 17 00:00:00 2001 From: Dave Carroll Date: Fri, 22 Mar 2019 12:16:03 -0600 Subject: scsi: aacraid: Insure we don't access PCIe space during AER/EEH There are a few windows during AER/EEH when we can access PCIe I/O mapped registers. This will harden the access to insure we do not allow PCIe access during errors Signed-off-by: Dave Carroll Reviewed-by: Sagar Biradar Signed-off-by: Martin K. Petersen --- drivers/scsi/aacraid/aacraid.h | 7 ++++++- drivers/scsi/aacraid/commsup.c | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h index 1df5171594b8..11fb68d7e60d 100644 --- a/drivers/scsi/aacraid/aacraid.h +++ b/drivers/scsi/aacraid/aacraid.h @@ -2640,9 +2640,14 @@ static inline unsigned int cap_to_cyls(sector_t capacity, unsigned divisor) return capacity; } +static inline int aac_pci_offline(struct aac_dev *dev) +{ + return pci_channel_offline(dev->pdev) || dev->handle_pci_error; +} + static inline int aac_adapter_check_health(struct aac_dev *dev) { - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -1; return (dev)->a_ops.adapter_check_health(dev); diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index e67e032936ef..78430a7b294c 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -672,7 +672,7 @@ int aac_fib_send(u16 command, struct fib *fibptr, unsigned long size, return -ETIMEDOUT; } - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -EFAULT; if ((blink = aac_adapter_check_health(dev)) > 0) { @@ -772,7 +772,7 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, spin_unlock_irqrestore(&fibptr->event_lock, flags); - if (unlikely(pci_channel_offline(dev->pdev))) + if (unlikely(aac_pci_offline(dev))) return -EFAULT; fibptr->flags |= FIB_CONTEXT_FLAG_WAIT; -- cgit From fba1bdd2a9a93f3e2181ec1936a3c2f6b37e7ed6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 01:30:59 -0500 Subject: scsi: qla4xxx: fix a potential NULL pointer dereference In case iscsi_lookup_endpoint fails, the fix returns -EINVAL to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Acked-by: Manish Rangankar Reviewed-by: Mukesh Ojha Signed-off-by: Martin K. Petersen --- drivers/scsi/qla4xxx/ql4_os.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c index 16a18d5d856f..6e4f4931ae17 100644 --- a/drivers/scsi/qla4xxx/ql4_os.c +++ b/drivers/scsi/qla4xxx/ql4_os.c @@ -3203,6 +3203,8 @@ static int qla4xxx_conn_bind(struct iscsi_cls_session *cls_session, if (iscsi_conn_bind(cls_session, cls_conn, is_leading)) return -EINVAL; ep = iscsi_lookup_endpoint(transport_fd); + if (!ep) + return -EINVAL; conn = cls_conn->dd_data; qla_conn = conn->dd_data; qla_conn->qla_ep = ep->dd_data; -- cgit From d498bc0ce8acb4a1bb80d6089d1932d919dc2532 Mon Sep 17 00:00:00 2001 From: Vinod Koul Date: Tue, 26 Mar 2019 10:55:47 +0530 Subject: MAINTAINERS: Fix uniphier-mdmac.c file path Commit 32e74aabebc8 ("dmaengine: uniphier-mdmac: add UniPhier MIO DMAC driver") wrongly put filepath for uniphier-mdmac.c, fix it Fixes: 32e74aabebc8 ("dmaengine: uniphier-mdmac: add UniPhier MIO DMAC driver") Reported-by: Joe Perches Signed-off-by: Vinod Koul --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index e17ebf70b548..67e895feddc8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2356,7 +2356,7 @@ F: arch/arm/mm/cache-uniphier.c F: arch/arm64/boot/dts/socionext/uniphier* F: drivers/bus/uniphier-system-bus.c F: drivers/clk/uniphier/ -F: drivers/dmaengine/uniphier-mdmac.c +F: drivers/dma/uniphier-mdmac.c F: drivers/gpio/gpio-uniphier.c F: drivers/i2c/busses/i2c-uniphier* F: drivers/irqchip/irq-uniphier-aidet.c -- cgit From 1396929e8a903db80425343cacca766a18ad6409 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 22 Mar 2019 16:51:07 +0800 Subject: phy: sun4i-usb: Support set_mode to USB_HOST for non-OTG PHYs While only the first PHY supports mode switching, the remaining PHYs work in USB host mode. They should support set_mode with mode=USB_HOST instead of failing. This is especially needed now that the USB core does set_mode for all USB ports, which was added in commit b97a31348379 ("usb: core: comply to PHY framework"). Make set_mode with mode=USB_HOST a no-op instead of failing for the non-OTG USB PHYs. Fixes: 6ba43c291961 ("phy-sun4i-usb: Add support for phy_set_mode") Signed-off-by: Chen-Yu Tsai Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/phy/allwinner/phy-sun4i-usb.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c index 5163097b43df..4bbd9ede38c8 100644 --- a/drivers/phy/allwinner/phy-sun4i-usb.c +++ b/drivers/phy/allwinner/phy-sun4i-usb.c @@ -485,8 +485,11 @@ static int sun4i_usb_phy_set_mode(struct phy *_phy, struct sun4i_usb_phy_data *data = to_sun4i_usb_phy_data(phy); int new_mode; - if (phy->index != 0) + if (phy->index != 0) { + if (mode == PHY_MODE_USB_HOST) + return 0; return -EINVAL; + } switch (mode) { case PHY_MODE_USB_HOST: -- cgit From e671765e521c571afec3157a7e17502d54f6a43e Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 22 Mar 2019 16:51:08 +0800 Subject: usb: core: Try generic PHY_MODE_USB_HOST if usb_phy_roothub_set_mode fails Some PHYs do not support PHY_MODE_USB_HOST_SS, i.e. USB 3.0 or higher. Fall back and try the more generic PHY_MODE_USB_HOST if it fails. Fixes: b97a31348379 ("usb: core: comply to PHY framework") Signed-off-by: Chen-Yu Tsai Tested-by: Neil Armstrong Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c index 3189181bb628..975d7c1288e3 100644 --- a/drivers/usb/core/hcd.c +++ b/drivers/usb/core/hcd.c @@ -2741,6 +2741,9 @@ int usb_add_hcd(struct usb_hcd *hcd, retval = usb_phy_roothub_set_mode(hcd->phy_roothub, PHY_MODE_USB_HOST_SS); + if (retval) + retval = usb_phy_roothub_set_mode(hcd->phy_roothub, + PHY_MODE_USB_HOST); if (retval) goto err_usb_phy_roothub_power_on; -- cgit From 41f00e6e9e55546390031996b773e7f3c1d95928 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Wed, 20 Mar 2019 10:27:11 -0500 Subject: usb: usb251xb: fix to avoid potential NULL pointer dereference of_match_device in usb251xb_probe can fail and returns a NULL pointer. The patch avoids a potential NULL pointer dereference in this scenario. Signed-off-by: Aditya Pakki Reviewed-by: Richard Leitner Signed-off-by: Greg Kroah-Hartman --- drivers/usb/misc/usb251xb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c index 2c8e2cad7e10..04684849d683 100644 --- a/drivers/usb/misc/usb251xb.c +++ b/drivers/usb/misc/usb251xb.c @@ -612,7 +612,7 @@ static int usb251xb_probe(struct usb251xb *hub) dev); int err; - if (np) { + if (np && of_id) { err = usb251xb_get_ofdata(hub, (struct usb251xb_data *)of_id->data); if (err) { -- cgit From 3d54d10c6afed34fd45b852bf76f55e8da31d8ef Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 14:54:30 +0100 Subject: usb: mtu3: fix EXTCON dependency When EXTCON is a loadable module, mtu3 fails to link as built-in: drivers/usb/mtu3/mtu3_plat.o: In function `mtu3_probe': mtu3_plat.c:(.text+0x690): undefined reference to `extcon_get_edev_by_phandle' Add a Kconfig dependency to force mtu3 also to be a loadable module if extconn is, but still allow it to be built without extcon. Fixes: d0ed062a8b75 ("usb: mtu3: dual-role mode support") Signed-off-by: Arnd Bergmann Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/usb/mtu3/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/mtu3/Kconfig b/drivers/usb/mtu3/Kconfig index bcc23486c4ed..928c2cd6fc00 100644 --- a/drivers/usb/mtu3/Kconfig +++ b/drivers/usb/mtu3/Kconfig @@ -6,6 +6,7 @@ config USB_MTU3 tristate "MediaTek USB3 Dual Role controller" depends on USB || USB_GADGET depends on ARCH_MEDIATEK || COMPILE_TEST + depends on EXTCON || !EXTCON select USB_XHCI_MTK if USB_SUPPORT && USB_XHCI_HCD help Say Y or M here if your system runs on MediaTek SoCs with -- cgit From 4b9a3932e7ba929baa231231e61874c7a56f8959 Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Fri, 22 Mar 2019 22:49:44 +0200 Subject: drm/i915: Mark AML 0x87CA as ULX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If I'm reading the spec right AML 0x87CA is a Y SKU, so it should be marked as ULX in our old style terminology. Cc: stable@vger.kernel.org Cc: José Roberto de Souza Cc: Rodrigo Vivi Cc: Tvrtko Ursulin Fixes: c0c46ca461f1 ("drm/i915/aml: Add new Amber Lake PCI ID") Signed-off-by: Ville Syrjälä Link: https://patchwork.freedesktop.org/patch/msgid/20190322204944.23613-1-ville.syrjala@linux.intel.com Reviewed-by: José Roberto de Souza (cherry picked from commit 57b1c4460dc46a00f6ec439f3f11d670736b0209) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_drv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 9adc7bb9e69c..a67a63b5aa84 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -2346,7 +2346,8 @@ static inline unsigned int i915_sg_segment_size(void) INTEL_DEVID(dev_priv) == 0x5915 || \ INTEL_DEVID(dev_priv) == 0x591E) #define IS_AML_ULX(dev_priv) (INTEL_DEVID(dev_priv) == 0x591C || \ - INTEL_DEVID(dev_priv) == 0x87C0) + INTEL_DEVID(dev_priv) == 0x87C0 || \ + INTEL_DEVID(dev_priv) == 0x87CA) #define IS_SKL_GT2(dev_priv) (IS_SKYLAKE(dev_priv) && \ INTEL_INFO(dev_priv)->gt == 2) #define IS_SKL_GT3(dev_priv) (IS_SKYLAKE(dev_priv) && \ -- cgit From ff9d31d0d46672e201fc9ff59c42f1eef5f00c77 Mon Sep 17 00:00:00 2001 From: Tom Zanussi Date: Wed, 20 Mar 2019 12:53:33 -0500 Subject: tracing: Remove unnecessary var_ref destroy in track_data_destroy() Commit 656fe2ba85e8 (tracing: Use hist trigger's var_ref array to destroy var_refs) centralized the destruction of all the var_refs in one place so that other code didn't have to do it. The track_data_destroy() added later ignored that and also destroyed the track_data var_ref, causing a double-free error flagged by KASAN. ================================================================== BUG: KASAN: use-after-free in destroy_hist_field+0x30/0x70 Read of size 8 at addr ffff888086df2210 by task bash/1694 CPU: 6 PID: 1694 Comm: bash Not tainted 5.1.0-rc1-test+ #15 Hardware name: Hewlett-Packard HP Compaq Pro 6300 SFF/339A, BIOS K01 v03.03 07/14/2016 Call Trace: dump_stack+0x71/0xa0 ? destroy_hist_field+0x30/0x70 print_address_description.cold.3+0x9/0x1fb ? destroy_hist_field+0x30/0x70 ? destroy_hist_field+0x30/0x70 kasan_report.cold.4+0x1a/0x33 ? __kasan_slab_free+0x100/0x150 ? destroy_hist_field+0x30/0x70 destroy_hist_field+0x30/0x70 track_data_destroy+0x55/0xe0 destroy_hist_data+0x1f0/0x350 hist_unreg_all+0x203/0x220 event_trigger_open+0xbb/0x130 do_dentry_open+0x296/0x700 ? stacktrace_count_trigger+0x30/0x30 ? generic_permission+0x56/0x200 ? __x64_sys_fchdir+0xd0/0xd0 ? inode_permission+0x55/0x200 ? security_inode_permission+0x18/0x60 path_openat+0x633/0x22b0 ? path_lookupat.isra.50+0x420/0x420 ? __kasan_kmalloc.constprop.12+0xc1/0xd0 ? kmem_cache_alloc+0xe5/0x260 ? getname_flags+0x6c/0x2a0 ? do_sys_open+0x149/0x2b0 ? do_syscall_64+0x73/0x1b0 ? entry_SYSCALL_64_after_hwframe+0x44/0xa9 ? _raw_write_lock_bh+0xe0/0xe0 ? __kernel_text_address+0xe/0x30 ? unwind_get_return_address+0x2f/0x50 ? __list_add_valid+0x2d/0x70 ? deactivate_slab.isra.62+0x1f4/0x5a0 ? getname_flags+0x6c/0x2a0 ? set_track+0x76/0x120 do_filp_open+0x11a/0x1a0 ? may_open_dev+0x50/0x50 ? _raw_spin_lock+0x7a/0xd0 ? _raw_write_lock_bh+0xe0/0xe0 ? __alloc_fd+0x10f/0x200 do_sys_open+0x1db/0x2b0 ? filp_open+0x50/0x50 do_syscall_64+0x73/0x1b0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7fa7b24a4ca2 Code: 25 00 00 41 00 3d 00 00 41 00 74 4c 48 8d 05 85 7a 0d 00 8b 00 85 c0 75 6d 89 f2 b8 01 01 00 00 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 0f 87 a2 00 00 00 48 8b 4c 24 28 64 48 33 0c 25 RSP: 002b:00007fffbafb3af0 EFLAGS: 00000246 ORIG_RAX: 0000000000000101 RAX: ffffffffffffffda RBX: 000055d3648ade30 RCX: 00007fa7b24a4ca2 RDX: 0000000000000241 RSI: 000055d364a55240 RDI: 00000000ffffff9c RBP: 00007fffbafb3bf0 R08: 0000000000000020 R09: 0000000000000002 R10: 00000000000001b6 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000003 R14: 0000000000000001 R15: 000055d364a55240 ================================================================== So remove the track_data_destroy() destroy_hist_field() call for that var_ref. Link: http://lkml.kernel.org/r/1deffec420f6a16d11dd8647318d34a66d1989a9.camel@linux.intel.com Fixes: 466f4528fbc69 ("tracing: Generalize hist trigger onmax and save action") Reported-by: Steven Rostedt (VMware) Signed-off-by: Tom Zanussi Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_events_hist.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index ca46339f3009..795aa2038377 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -3713,7 +3713,6 @@ static void track_data_destroy(struct hist_trigger_data *hist_data, struct trace_event_file *file = hist_data->event_file; destroy_hist_field(data->track_data.track_var, 0); - destroy_hist_field(data->track_data.var_ref, 0); if (data->action == ACTION_SNAPSHOT) { struct track_data *track_data; -- cgit From 3dee10da2e9ff220e054a8f158cc296c797fbe81 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Thu, 21 Mar 2019 23:58:20 -0700 Subject: tracing: initialize variable in create_dyn_event() Fix compile warning in create_dyn_event(): 'ret' may be used uninitialized in this function [-Wuninitialized]. Link: http://lkml.kernel.org/r/1553237900-8555-1-git-send-email-frowand.list@gmail.com Cc: Masami Hiramatsu Cc: Ingo Molnar Cc: Tom Zanussi Cc: Ravi Bangoria Cc: stable@vger.kernel.org Fixes: 5448d44c3855 ("tracing: Add unified dynamic event framework") Signed-off-by: Frank Rowand Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/trace_dynevent.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/trace/trace_dynevent.c b/kernel/trace/trace_dynevent.c index dd1f43588d70..fa100ed3b4de 100644 --- a/kernel/trace/trace_dynevent.c +++ b/kernel/trace/trace_dynevent.c @@ -74,7 +74,7 @@ int dyn_event_release(int argc, char **argv, struct dyn_event_operations *type) static int create_dyn_event(int argc, char **argv) { struct dyn_event_operations *ops; - int ret; + int ret = -ENODEV; if (argv[0][0] == '-' || argv[0][0] == '!') return dyn_event_release(argc, argv, NULL); -- cgit From 9efb85c5cfac7e1f0caae4471446d936ff2163fe Mon Sep 17 00:00:00 2001 From: Hariprasad Kelam Date: Sun, 24 Mar 2019 00:05:23 +0530 Subject: ftrace: Fix warning using plain integer as NULL & spelling corrections Changed 0 --> NULL to avoid sparse warning Corrected spelling mistakes reported by checkpatch.pl Sparse warning below: sudo make C=2 CF=-D__CHECK_ENDIAN__ M=kernel/trace CHECK kernel/trace/ftrace.c kernel/trace/ftrace.c:3007:24: warning: Using plain integer as NULL pointer kernel/trace/ftrace.c:4758:37: warning: Using plain integer as NULL pointer Link: http://lkml.kernel.org/r/20190323183523.GA2244@hari-Inspiron-1545 Signed-off-by: Hariprasad Kelam Signed-off-by: Steven Rostedt (VMware) --- kernel/trace/ftrace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fa79323331b2..26c8ca9bd06b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1992,7 +1992,7 @@ static void print_bug_type(void) * modifying the code. @failed should be one of either: * EFAULT - if the problem happens on reading the @ip address * EINVAL - if what is read at @ip is not what was expected - * EPERM - if the problem happens on writting to the @ip address + * EPERM - if the problem happens on writing to the @ip address */ void ftrace_bug(int failed, struct dyn_ftrace *rec) { @@ -2391,7 +2391,7 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable) return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr); } - return -1; /* unknow ftrace bug */ + return -1; /* unknown ftrace bug */ } void __weak ftrace_replace_code(int mod_flags) @@ -3004,7 +3004,7 @@ ftrace_allocate_pages(unsigned long num_to_init) int cnt; if (!num_to_init) - return 0; + return NULL; start_pg = pg = kzalloc(sizeof(*pg), GFP_KERNEL); if (!pg) @@ -4755,7 +4755,7 @@ static int ftrace_set_addr(struct ftrace_ops *ops, unsigned long ip, int remove, int reset, int enable) { - return ftrace_set_hash(ops, 0, 0, ip, remove, reset, enable); + return ftrace_set_hash(ops, NULL, 0, ip, remove, reset, enable); } /** @@ -5463,7 +5463,7 @@ void ftrace_create_filter_files(struct ftrace_ops *ops, /* * The name "destroy_filter_files" is really a misnomer. Although - * in the future, it may actualy delete the files, but this is + * in the future, it may actually delete the files, but this is * really intended to make sure the ops passed in are disabled * and that when this function returns, the caller is free to * free the ops. @@ -5786,7 +5786,7 @@ void ftrace_module_enable(struct module *mod) /* * If the tracing is enabled, go ahead and enable the record. * - * The reason not to enable the record immediatelly is the + * The reason not to enable the record immediately is the * inherent check of ftrace_make_nop/ftrace_make_call for * correct previous instructions. Making first the NOP * conversion puts the module to the correct state, thus -- cgit From db779ef67ffeadbb44e9e818eb64dbe528e2f48f Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Tue, 26 Mar 2019 12:20:28 +0530 Subject: proc/kcore: Remove unused kclist_add_remap() Commit bf904d2762ee ("x86/pti/64: Remove the SYSCALL64 entry trampoline") removed the sole usage of kclist_add_remap() but it did not remove the left-over definition from the include file. Fix the same. Signed-off-by: Bhupesh Sharma Signed-off-by: Borislav Petkov Cc: Adrian Hunter Cc: Andrew Morton Cc: Dave Anderson Cc: Dave Young Cc: "David S. Miller" Cc: Ingo Molnar Cc: James Morse Cc: Kairui Song Cc: kexec@lists.infradead.org Cc: linux-arm-kernel@lists.infradead.org Cc: linuxppc-dev@lists.ozlabs.org Cc: Michael Ellerman Cc: Omar Sandoval Cc: "Peter Zijlstra (Intel)" Cc: Rahul Lakkireddy Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/1553583028-17804-1-git-send-email-bhsharma@redhat.com --- include/linux/kcore.h | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/include/linux/kcore.h b/include/linux/kcore.h index 8c3f8c14eeaa..94b561df3877 100644 --- a/include/linux/kcore.h +++ b/include/linux/kcore.h @@ -38,22 +38,11 @@ struct vmcoredd_node { #ifdef CONFIG_PROC_KCORE void __init kclist_add(struct kcore_list *, void *, size_t, int type); -static inline -void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) -{ - m->vaddr = (unsigned long)vaddr; - kclist_add(m, addr, sz, KCORE_REMAP); -} #else static inline void kclist_add(struct kcore_list *new, void *addr, size_t size, int type) { } - -static inline -void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz) -{ -} #endif #endif /* _LINUX_KCORE_H */ -- cgit From 2032a8a27b5cc0f578d37fa16fa2494b80a0d00a Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Mon, 25 Mar 2019 17:01:45 -0700 Subject: xfs: serialize unaligned dio writes against all other dio writes XFS applies more strict serialization constraints to unaligned direct writes to accommodate things like direct I/O layer zeroing, unwritten extent conversion, etc. Unaligned submissions acquire the exclusive iolock and wait for in-flight dio to complete to ensure multiple submissions do not race on the same block and cause data corruption. This generally works in the case of an aligned dio followed by an unaligned dio, but the serialization is lost if I/Os occur in the opposite order. If an unaligned write is submitted first and immediately followed by an overlapping, aligned write, the latter submits without the typical unaligned serialization barriers because there is no indication of an unaligned dio still in-flight. This can lead to unpredictable results. To provide proper unaligned dio serialization, require that such direct writes are always the only dio allowed in-flight at one time for a particular inode. We already acquire the exclusive iolock and drain pending dio before submitting the unaligned dio. Wait once more after the dio submission to hold the iolock across the I/O and prevent further submissions until the unaligned I/O completes. This is heavy handed, but consistent with the current pre-submission serialization for unaligned direct writes. Signed-off-by: Brian Foster Reviewed-by: Allison Henderson Reviewed-by: Dave Chinner Reviewed-by: Darrick J. Wong Signed-off-by: Darrick J. Wong --- fs/xfs/xfs_file.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1f2e2845eb76..a7ceae90110e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -529,18 +529,17 @@ xfs_file_dio_aio_write( count = iov_iter_count(from); /* - * If we are doing unaligned IO, wait for all other IO to drain, - * otherwise demote the lock if we had to take the exclusive lock - * for other reasons in xfs_file_aio_write_checks. + * If we are doing unaligned IO, we can't allow any other overlapping IO + * in-flight at the same time or we risk data corruption. Wait for all + * other IO to drain before we submit. If the IO is aligned, demote the + * iolock if we had to take the exclusive lock in + * xfs_file_aio_write_checks() for other reasons. */ if (unaligned_io) { - /* If we are going to wait for other DIO to finish, bail */ - if (iocb->ki_flags & IOCB_NOWAIT) { - if (atomic_read(&inode->i_dio_count)) - return -EAGAIN; - } else { - inode_dio_wait(inode); - } + /* unaligned dio always waits, bail */ + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + inode_dio_wait(inode); } else if (iolock == XFS_IOLOCK_EXCL) { xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; @@ -548,6 +547,14 @@ xfs_file_dio_aio_write( trace_xfs_file_direct_write(ip, count, iocb->ki_pos); ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); + + /* + * If unaligned, this is the only IO in-flight. If it has not yet + * completed, wait on it before we release the iolock to prevent + * subsequent overlapping IO. + */ + if (ret == -EIOCBQUEUED && unaligned_io) + inode_dio_wait(inode); out: xfs_iunlock(ip, iolock); -- cgit From e2a829b3da01b9b32c4d0291d042b8a6e2a98ca3 Mon Sep 17 00:00:00 2001 From: Bernhard Rosenkraenzer Date: Tue, 5 Mar 2019 00:38:19 +0100 Subject: ALSA: hda/realtek - Fix speakers on Acer Predator Helios 500 Ryzen laptops On an Acer Predator Helios 500 (Ryzen version), the laptop's speakers don't work out of the box. The problem can be worked around with hdajackretask, remapping the "Black Headphone, Right side" pin (0x21) to the Internal speaker. This patch adds a quirk to change this mapping by default. [ corrected ALC299_FIXUP_PREDATOR_SPK definition and adapted for the latest tree by tiwai ] Signed-off-by: Bernhard Rosenkraenzer Signed-off-by: Takashi Iwai --- sound/pci/hda/patch_realtek.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 01c71467600b..a3fb3d4c5730 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -5689,6 +5689,7 @@ enum { ALC225_FIXUP_WYSE_DISABLE_MIC_VREF, ALC286_FIXUP_ACER_AIO_HEADSET_MIC, ALC256_FIXUP_ASUS_MIC_NO_PRESENCE, + ALC299_FIXUP_PREDATOR_SPK, }; static const struct hda_fixup alc269_fixups[] = { @@ -6706,6 +6707,13 @@ static const struct hda_fixup alc269_fixups[] = { .chained = true, .chain_id = ALC256_FIXUP_ASUS_HEADSET_MODE }, + [ALC299_FIXUP_PREDATOR_SPK] = { + .type = HDA_FIXUP_PINS, + .v.pins = (const struct hda_pintbl[]) { + { 0x21, 0x90170150 }, /* use as headset mic, without its own jack detect */ + { } + } + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -6724,6 +6732,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x106d, "Acer Cloudbook 14", ALC283_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0x1025, 0x1099, "Acer Aspire E5-523G", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x110e, "Acer Aspire ES1-432", ALC255_FIXUP_ACER_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK), SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC), @@ -7124,6 +7133,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = { {.id = ALC255_FIXUP_DELL_HEADSET_MIC, .name = "alc255-dell-headset"}, {.id = ALC295_FIXUP_HP_X360, .name = "alc295-hp-x360"}, {.id = ALC295_FIXUP_CHROME_BOOK, .name = "alc-sense-combo"}, + {.id = ALC299_FIXUP_PREDATOR_SPK, .name = "predator-spk"}, {} }; #define ALC225_STANDARD_PINS \ -- cgit From fb1eb41a3dd4cfff274c98f3c3324ab329641298 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:00 +0100 Subject: dt-bindings: net: dsa: qca8k: fix example In the example, the phy at phy@0 is clashing with the switch0@0 at the same address. Usually, the switches are accessible through pseudo PHYs which in case of the qca8k are located at 0x10 - 0x18. Reviewed-by: Florian Fainelli Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index bbcb255c3150..5eda99e6c86e 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -55,12 +55,12 @@ Example: reg = <4>; }; - switch0@0 { + switch@10 { compatible = "qca,qca8337"; #address-cells = <1>; #size-cells = <0>; - reg = <0>; + reg = <0x10>; ports { #address-cells = <1>; -- cgit From 5e07321f3388e6f2b13c43ae9df3e09efa8418e0 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:01 +0100 Subject: dt-bindings: net: dsa: qca8k: support internal mdio-bus This patch updates the qca8k's binding to document to the approach for using the internal mdio-bus of the supported qca8k switches. Reviewed-by: Florian Fainelli Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/qca8k.txt | 69 ++++++++++++++++++++-- 1 file changed, 64 insertions(+), 5 deletions(-) diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 5eda99e6c86e..93a7469e70d4 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -12,10 +12,15 @@ Required properties: Subnodes: The integrated switch subnode should be specified according to the binding -described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of -port and PHY id, each subnode describing a port needs to have a valid phandle -referencing the internal PHY connected to it. The CPU port of this switch is -always port 0. +described in dsa/dsa.txt. If the QCA8K switch is connect to a SoC's external +mdio-bus each subnode describing a port needs to have a valid phandle +referencing the internal PHY it is connected to. This is because there's no +N:N mapping of port and PHY id. + +Don't use mixed external and internal mdio-bus configurations, as this is +not supported by the hardware. + +The CPU port of this switch is always port 0. A CPU port node has the following optional node: @@ -31,8 +36,9 @@ For QCA8K the 'fixed-link' sub-node supports only the following properties: - 'full-duplex' (boolean, optional), to indicate that full duplex is used. When absent, half duplex is assumed. -Example: +Examples: +for the external mdio-bus configuration: &mdio0 { phy_port1: phy@0 { @@ -108,3 +114,56 @@ Example: }; }; }; + +for the internal master mdio-bus configuration: + + &mdio0 { + switch@10 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + + reg = <0x10>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + fixed-link { + speed = 1000; + full-duplex; + }; + }; + + port@1 { + reg = <1>; + label = "lan1"; + }; + + port@2 { + reg = <2>; + label = "lan2"; + }; + + port@3 { + reg = <3>; + label = "lan3"; + }; + + port@4 { + reg = <4>; + label = "lan4"; + }; + + port@5 { + reg = <5>; + label = "wan"; + }; + }; + }; + }; -- cgit From 1eec7151ae0e134bd42e3f128066b2ff8da21393 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:02 +0100 Subject: net: dsa: qca8k: remove leftover phy accessors This belated patch implements Andrew Lunn's request of "remove the phy_read() and phy_write() functions." While seemingly harmless, this causes the switch's user port PHYs to get registered twice. This is because the DSA subsystem will create a slave mdio-bus not knowing that the qca8k_phy_(read|write) accessors operate on the external mdio-bus. So the same "bus" gets effectively duplicated. Cc: stable@vger.kernel.org Fixes: 6b93fb46480a ("net-next: dsa: add new driver for qca8xxx family") Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 576b37d12a63..14ad78225f07 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -624,22 +624,6 @@ qca8k_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phy) qca8k_port_set_status(priv, port, 1); } -static int -qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - return mdiobus_read(priv->bus, phy, regnum); -} - -static int -qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val) -{ - struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; - - return mdiobus_write(priv->bus, phy, regnum, val); -} - static void qca8k_get_strings(struct dsa_switch *ds, int port, u32 stringset, uint8_t *data) { @@ -879,8 +863,6 @@ static const struct dsa_switch_ops qca8k_switch_ops = { .setup = qca8k_setup, .adjust_link = qca8k_adjust_link, .get_strings = qca8k_get_strings, - .phy_read = qca8k_phy_read, - .phy_write = qca8k_phy_write, .get_ethtool_stats = qca8k_get_ethtool_stats, .get_sset_count = qca8k_get_sset_count, .get_mac_eee = qca8k_get_mac_eee, -- cgit From db460c54b67fc2cbe6dcef88b7bf3cba8e07f80e Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Fri, 22 Mar 2019 01:05:03 +0100 Subject: net: dsa: qca8k: extend slave-bus implementations This patch implements accessors for the QCA8337 MDIO access through the MDIO_MASTER register, which makes it possible to access the PHYs on slave-bus through the switch. In cases where the switch ports are already mapped via external "phy-phandles", the internal mdio-bus is disabled in order to prevent a duplicated discovery and enumeration of the same PHYs. Don't use mixed external and internal mdio-bus configurations, as this is not supported by the hardware. Signed-off-by: Christian Lamparter Signed-off-by: David S. Miller --- drivers/net/dsa/qca8k.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++- drivers/net/dsa/qca8k.h | 13 ++++ 2 files changed, 168 insertions(+), 1 deletion(-) diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c index 14ad78225f07..c4fa400efdcc 100644 --- a/drivers/net/dsa/qca8k.c +++ b/drivers/net/dsa/qca8k.c @@ -481,6 +481,155 @@ qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask); } +static u32 +qca8k_port_to_phy(int port) +{ + /* From Andrew Lunn: + * Port 0 has no internal phy. + * Port 1 has an internal PHY at MDIO address 0. + * Port 2 has an internal PHY at MDIO address 1. + * ... + * Port 5 has an internal PHY at MDIO address 4. + * Port 6 has no internal PHY. + */ + + return port - 1; +} + +static int +qca8k_mdio_write(struct qca8k_priv *priv, int port, u32 regnum, u16 data) +{ + u32 phy, val; + + if (regnum >= QCA8K_MDIO_MASTER_MAX_REG) + return -EINVAL; + + /* callee is responsible for not passing bad ports, + * but we still would like to make spills impossible. + */ + phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR; + val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN | + QCA8K_MDIO_MASTER_WRITE | QCA8K_MDIO_MASTER_PHY_ADDR(phy) | + QCA8K_MDIO_MASTER_REG_ADDR(regnum) | + QCA8K_MDIO_MASTER_DATA(data); + + qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val); + + return qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL, + QCA8K_MDIO_MASTER_BUSY); +} + +static int +qca8k_mdio_read(struct qca8k_priv *priv, int port, u32 regnum) +{ + u32 phy, val; + + if (regnum >= QCA8K_MDIO_MASTER_MAX_REG) + return -EINVAL; + + /* callee is responsible for not passing bad ports, + * but we still would like to make spills impossible. + */ + phy = qca8k_port_to_phy(port) % PHY_MAX_ADDR; + val = QCA8K_MDIO_MASTER_BUSY | QCA8K_MDIO_MASTER_EN | + QCA8K_MDIO_MASTER_READ | QCA8K_MDIO_MASTER_PHY_ADDR(phy) | + QCA8K_MDIO_MASTER_REG_ADDR(regnum); + + qca8k_write(priv, QCA8K_MDIO_MASTER_CTRL, val); + + if (qca8k_busy_wait(priv, QCA8K_MDIO_MASTER_CTRL, + QCA8K_MDIO_MASTER_BUSY)) + return -ETIMEDOUT; + + val = (qca8k_read(priv, QCA8K_MDIO_MASTER_CTRL) & + QCA8K_MDIO_MASTER_DATA_MASK); + + return val; +} + +static int +qca8k_phy_write(struct dsa_switch *ds, int port, int regnum, u16 data) +{ + struct qca8k_priv *priv = ds->priv; + + return qca8k_mdio_write(priv, port, regnum, data); +} + +static int +qca8k_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + struct qca8k_priv *priv = ds->priv; + int ret; + + ret = qca8k_mdio_read(priv, port, regnum); + + if (ret < 0) + return 0xffff; + + return ret; +} + +static int +qca8k_setup_mdio_bus(struct qca8k_priv *priv) +{ + u32 internal_mdio_mask = 0, external_mdio_mask = 0, reg; + struct device_node *ports, *port; + int err; + + ports = of_get_child_by_name(priv->dev->of_node, "ports"); + if (!ports) + return -EINVAL; + + for_each_available_child_of_node(ports, port) { + err = of_property_read_u32(port, "reg", ®); + if (err) + return err; + + if (!dsa_is_user_port(priv->ds, reg)) + continue; + + if (of_property_read_bool(port, "phy-handle")) + external_mdio_mask |= BIT(reg); + else + internal_mdio_mask |= BIT(reg); + } + + if (!external_mdio_mask && !internal_mdio_mask) { + dev_err(priv->dev, "no PHYs are defined.\n"); + return -EINVAL; + } + + /* The QCA8K_MDIO_MASTER_EN Bit, which grants access to PHYs through + * the MDIO_MASTER register also _disconnects_ the external MDC + * passthrough to the internal PHYs. It's not possible to use both + * configurations at the same time! + * + * Because this came up during the review process: + * If the external mdio-bus driver is capable magically disabling + * the QCA8K_MDIO_MASTER_EN and mutex/spin-locking out the qca8k's + * accessors for the time being, it would be possible to pull this + * off. + */ + if (!!external_mdio_mask && !!internal_mdio_mask) { + dev_err(priv->dev, "either internal or external mdio bus configuration is supported.\n"); + return -EINVAL; + } + + if (external_mdio_mask) { + /* Make sure to disable the internal mdio bus in cases + * a dt-overlay and driver reload changed the configuration + */ + + qca8k_reg_clear(priv, QCA8K_MDIO_MASTER_CTRL, + QCA8K_MDIO_MASTER_EN); + return 0; + } + + priv->ops.phy_read = qca8k_phy_read; + priv->ops.phy_write = qca8k_phy_write; + return 0; +} + static int qca8k_setup(struct dsa_switch *ds) { @@ -502,6 +651,10 @@ qca8k_setup(struct dsa_switch *ds) if (IS_ERR(priv->regmap)) pr_warn("regmap initialization failed"); + ret = qca8k_setup_mdio_bus(priv); + if (ret) + return ret; + /* Initialize CPU port pad mode (xMII type, delays...) */ phy_mode = of_get_phy_mode(ds->ports[QCA8K_CPU_PORT].dn); if (phy_mode < 0) { @@ -905,7 +1058,8 @@ qca8k_sw_probe(struct mdio_device *mdiodev) return -ENOMEM; priv->ds->priv = priv; - priv->ds->ops = &qca8k_switch_ops; + priv->ops = qca8k_switch_ops; + priv->ds->ops = &priv->ops; mutex_init(&priv->reg_mutex); dev_set_drvdata(&mdiodev->dev, priv); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h index d146e54c8a6c..249fd62268e5 100644 --- a/drivers/net/dsa/qca8k.h +++ b/drivers/net/dsa/qca8k.h @@ -49,6 +49,18 @@ #define QCA8K_MIB_FLUSH BIT(24) #define QCA8K_MIB_CPU_KEEP BIT(20) #define QCA8K_MIB_BUSY BIT(17) +#define QCA8K_MDIO_MASTER_CTRL 0x3c +#define QCA8K_MDIO_MASTER_BUSY BIT(31) +#define QCA8K_MDIO_MASTER_EN BIT(30) +#define QCA8K_MDIO_MASTER_READ BIT(27) +#define QCA8K_MDIO_MASTER_WRITE 0 +#define QCA8K_MDIO_MASTER_SUP_PRE BIT(26) +#define QCA8K_MDIO_MASTER_PHY_ADDR(x) ((x) << 21) +#define QCA8K_MDIO_MASTER_REG_ADDR(x) ((x) << 16) +#define QCA8K_MDIO_MASTER_DATA(x) (x) +#define QCA8K_MDIO_MASTER_DATA_MASK GENMASK(15, 0) +#define QCA8K_MDIO_MASTER_MAX_PORTS 5 +#define QCA8K_MDIO_MASTER_MAX_REG 32 #define QCA8K_GOL_MAC_ADDR0 0x60 #define QCA8K_GOL_MAC_ADDR1 0x64 #define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4) @@ -169,6 +181,7 @@ struct qca8k_priv { struct dsa_switch *ds; struct mutex reg_mutex; struct device *dev; + struct dsa_switch_ops ops; }; struct qca8k_mib_desc { -- cgit From 1f8389bf63aec99218c62490869ca38d1a38ce46 Mon Sep 17 00:00:00 2001 From: Leslie Monis Date: Sat, 23 Mar 2019 19:11:33 +0530 Subject: net: sched: Kconfig: update reference link for PIE RFC 8033 replaces the IETF draft for PIE Signed-off-by: Leslie Monis Signed-off-by: David S. Miller --- net/sched/Kconfig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 1b9afdee5ba9..5c02ad97ef23 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -358,8 +358,7 @@ config NET_SCH_PIE help Say Y here if you want to use the Proportional Integral controller Enhanced scheduler packet scheduling algorithm. - For more information, please see - http://tools.ietf.org/html/draft-pan-tsvwg-pie-00 + For more information, please see https://tools.ietf.org/html/rfc8033 To compile this driver as a module, choose M here: the module will be called sch_pie. -- cgit From b7ebee2f95fb0fa2862d5ed2de707f872c311393 Mon Sep 17 00:00:00 2001 From: Dmitry Bezrukov Date: Sat, 23 Mar 2019 13:59:53 +0000 Subject: net: usb: aqc111: Extend HWID table by QNAP device New device of QNAP based on aqc111u Add this ID to blacklist of cdc_ether driver as well Signed-off-by: Dmitry Bezrukov Signed-off-by: David S. Miller --- drivers/net/usb/aqc111.c | 15 +++++++++++++++ drivers/net/usb/cdc_ether.c | 8 ++++++++ 2 files changed, 23 insertions(+) diff --git a/drivers/net/usb/aqc111.c b/drivers/net/usb/aqc111.c index 820a2fe7d027..aff995be2a31 100644 --- a/drivers/net/usb/aqc111.c +++ b/drivers/net/usb/aqc111.c @@ -1301,6 +1301,20 @@ static const struct driver_info trendnet_info = { .tx_fixup = aqc111_tx_fixup, }; +static const struct driver_info qnap_info = { + .description = "QNAP QNA-UC5G1T USB to 5GbE Adapter", + .bind = aqc111_bind, + .unbind = aqc111_unbind, + .status = aqc111_status, + .link_reset = aqc111_link_reset, + .reset = aqc111_reset, + .stop = aqc111_stop, + .flags = FLAG_ETHER | FLAG_FRAMING_AX | + FLAG_AVOID_UNLINK_URBS | FLAG_MULTI_PACKET, + .rx_fixup = aqc111_rx_fixup, + .tx_fixup = aqc111_tx_fixup, +}; + static int aqc111_suspend(struct usb_interface *intf, pm_message_t message) { struct usbnet *dev = usb_get_intfdata(intf); @@ -1455,6 +1469,7 @@ static const struct usb_device_id products[] = { {AQC111_USB_ETH_DEV(0x0b95, 0x2790, asix111_info)}, {AQC111_USB_ETH_DEV(0x0b95, 0x2791, asix112_info)}, {AQC111_USB_ETH_DEV(0x20f4, 0xe05a, trendnet_info)}, + {AQC111_USB_ETH_DEV(0x1c04, 0x0015, qnap_info)}, { },/* END */ }; MODULE_DEVICE_TABLE(usb, products); diff --git a/drivers/net/usb/cdc_ether.c b/drivers/net/usb/cdc_ether.c index 5512a1038721..3e9b2c319e45 100644 --- a/drivers/net/usb/cdc_ether.c +++ b/drivers/net/usb/cdc_ether.c @@ -851,6 +851,14 @@ static const struct usb_device_id products[] = { .driver_info = 0, }, +/* QNAP QNA-UC5G1T USB to 5GbE Adapter (based on AQC111U) */ +{ + USB_DEVICE_AND_INTERFACE_INFO(0x1c04, 0x0015, USB_CLASS_COMM, + USB_CDC_SUBCLASS_ETHERNET, + USB_CDC_PROTO_NONE), + .driver_info = 0, +}, + /* WHITELIST!!! * * CDC Ether uses two interfaces, not necessarily consecutive. -- cgit From 9926cb5f8b0f0aea535735185600d74db7608550 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 24 Mar 2019 00:48:22 +0800 Subject: tipc: change to check tipc_own_id to return in tipc_net_stop When running a syz script, a panic occurred: [ 156.088228] BUG: KASAN: use-after-free in tipc_disc_timeout+0x9c9/0xb20 [tipc] [ 156.094315] Call Trace: [ 156.094844] [ 156.095306] dump_stack+0x7c/0xc0 [ 156.097346] print_address_description+0x65/0x22e [ 156.100445] kasan_report.cold.3+0x37/0x7a [ 156.102402] tipc_disc_timeout+0x9c9/0xb20 [tipc] [ 156.106517] call_timer_fn+0x19a/0x610 [ 156.112749] run_timer_softirq+0xb51/0x1090 It was caused by the netns freed without deleting the discoverer timer, while later on the netns would be accessed in the timer handler. The timer should have been deleted by tipc_net_stop() when cleaning up a netns. However, tipc has been able to enable a bearer and start d->timer without the local node_addr set since Commit 52dfae5c85a4 ("tipc: obtain node identity from interface by default"), which caused the timer not to be deleted in tipc_net_stop() then. So fix it in tipc_net_stop() by changing to check local node_id instead of local node_addr, as Jon suggested. While at it, remove the calling of tipc_nametbl_withdraw() there, since tipc_nametbl_stop() will take of the nametbl's freeing after. Fixes: 52dfae5c85a4 ("tipc: obtain node identity from interface by default") Reported-by: syzbot+a25307ad099309f1c2b9@syzkaller.appspotmail.com Signed-off-by: Xin Long Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/net.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/net/tipc/net.c b/net/tipc/net.c index f076edb74338..7ce1e86b024f 100644 --- a/net/tipc/net.c +++ b/net/tipc/net.c @@ -163,12 +163,9 @@ void tipc_sched_net_finalize(struct net *net, u32 addr) void tipc_net_stop(struct net *net) { - u32 self = tipc_own_addr(net); - - if (!self) + if (!tipc_own_id(net)) return; - tipc_nametbl_withdraw(net, TIPC_CFG_SRV, self, self, self); rtnl_lock(); tipc_bearer_stop(net); tipc_node_stop(net); -- cgit From 450895d04ba13a96886eddfeddb11556ae8624f1 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Sun, 24 Mar 2019 00:18:46 +0200 Subject: net: phy: bcm54xx: Encode link speed and activity into LEDs Previously the green and amber LEDs on this quad PHY were solid, to indicate an encoding of the link speed (10/100/1000). This keeps the LEDs always on just as before, but now they flash on Rx/Tx activity. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/broadcom.c | 13 +++++++++++++ include/linux/brcmphy.h | 16 ++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c index 9605d4fe540b..cb86a3e90c7d 100644 --- a/drivers/net/phy/broadcom.c +++ b/drivers/net/phy/broadcom.c @@ -323,6 +323,19 @@ static int bcm54xx_config_init(struct phy_device *phydev) bcm54xx_phydsp_config(phydev); + /* Encode link speed into LED1 and LED3 pair (green/amber). + * Also flash these two LEDs on activity. This means configuring + * them for MULTICOLOR and encoding link/activity into them. + */ + val = BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_MULTICOLOR1) | + BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_MULTICOLOR1); + bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1, val); + + val = BCM_LED_MULTICOLOR_IN_PHASE | + BCM5482_SHD_LEDS1_LED1(BCM_LED_MULTICOLOR_LINK_ACT) | + BCM5482_SHD_LEDS1_LED3(BCM_LED_MULTICOLOR_LINK_ACT); + bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val); + return 0; } diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 9cd00a37b8d3..6db2d9a6e503 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -148,6 +148,22 @@ #define BCM_LED_SRC_OFF 0xe /* Tied high */ #define BCM_LED_SRC_ON 0xf /* Tied low */ +/* + * Broadcom Multicolor LED configurations (expansion register 4) + */ +#define BCM_EXP_MULTICOLOR (MII_BCM54XX_EXP_SEL_ER + 0x04) +#define BCM_LED_MULTICOLOR_IN_PHASE BIT(8) +#define BCM_LED_MULTICOLOR_LINK_ACT 0x0 +#define BCM_LED_MULTICOLOR_SPEED 0x1 +#define BCM_LED_MULTICOLOR_ACT_FLASH 0x2 +#define BCM_LED_MULTICOLOR_FDX 0x3 +#define BCM_LED_MULTICOLOR_OFF 0x4 +#define BCM_LED_MULTICOLOR_ON 0x5 +#define BCM_LED_MULTICOLOR_ALT 0x6 +#define BCM_LED_MULTICOLOR_FLASH 0x7 +#define BCM_LED_MULTICOLOR_LINK 0x8 +#define BCM_LED_MULTICOLOR_ACT 0x9 +#define BCM_LED_MULTICOLOR_PROGRAM 0xa /* * BCM5482: Shadow registers -- cgit From c493b09b2792336f471d2206be180a4b4c1fc9ba Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sun, 24 Mar 2019 00:21:03 +0100 Subject: net: devlink: skip info_get op call if it is not defined in dumpit In dumpit, unlike doit, the check for info_get op being defined is missing. Add it and avoid null pointer dereference in case driver does not define this op. Fixes: f9cf22882c60 ("devlink: add device information API") Reported-by: Ido Schimmel Signed-off-by: Jiri Pirko Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/core/devlink.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/core/devlink.c b/net/core/devlink.c index 78e22cea4cc7..da0a29f30885 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -3897,6 +3897,11 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, continue; } + if (!devlink->ops->info_get) { + idx++; + continue; + } + mutex_lock(&devlink->lock); err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET, NETLINK_CB(cb->skb).portid, -- cgit From 047a013f8d0af8299ce2d02af152de6a30165ccc Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 25 Mar 2019 13:49:16 +0100 Subject: chelsio: use BUG() instead of BUG_ON(1) clang warns about possible bugs in a dead code branch after BUG_ON(1) when CONFIG_PROFILE_ALL_BRANCHES is enabled: drivers/net/ethernet/chelsio/cxgb4/sge.c:479:3: error: variable 'buf_size' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] BUG_ON(1); ^~~~~~~~~ include/asm-generic/bug.h:61:36: note: expanded from macro 'BUG_ON' #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) ^~~~~~~~~~~~~~~~~~~ include/linux/compiler.h:48:23: note: expanded from macro 'unlikely' # define unlikely(x) (__branch_check__(x, 0, __builtin_constant_p(x))) ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/sge.c:482:9: note: uninitialized use occurs here return buf_size; ^~~~~~~~ drivers/net/ethernet/chelsio/cxgb4/sge.c:479:3: note: remove the 'if' if its condition is always true BUG_ON(1); ^ include/asm-generic/bug.h:61:32: note: expanded from macro 'BUG_ON' #define BUG_ON(condition) do { if (unlikely(condition)) BUG(); } while (0) ^ drivers/net/ethernet/chelsio/cxgb4/sge.c:459:14: note: initialize the variable 'buf_size' to silence this warning int buf_size; ^ = 0 Use BUG() here to create simpler code that clang understands correctly. Signed-off-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 2 +- drivers/net/ethernet/chelsio/cxgb4/sge.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 3130b43bba52..02959035ed3f 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2620,7 +2620,7 @@ static inline struct port_info *ethqset2pinfo(struct adapter *adap, int qset) } /* should never happen! */ - BUG_ON(1); + BUG(); return NULL; } diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 88773ca58e6b..b3da81e90132 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -476,7 +476,7 @@ static inline int get_buf_size(struct adapter *adapter, break; default: - BUG_ON(1); + BUG(); } return buf_size; -- cgit From 8c838f53e149871561a9261ac768a9c7071b43d0 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Mon, 25 Mar 2019 13:06:22 +0000 Subject: dpaa2-eth: fix race condition with bql frame accounting It might happen that Tx conf acknowledges a frame before it was subscribed in bql, as subscribing was previously done after the enqueue operation. This patch moves the netdev_tx_sent_queue call before the actual frame enqueue, so that this can never happen. Fixes: 569dac6a5a0d ("dpaa2-eth: bql support") Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 1a68052abb94..dc339dc1adb2 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -815,6 +815,14 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) */ queue_mapping = skb_get_queue_mapping(skb); fq = &priv->fq[queue_mapping]; + + fd_len = dpaa2_fd_get_len(&fd); + nq = netdev_get_tx_queue(net_dev, queue_mapping); + netdev_tx_sent_queue(nq, fd_len); + + /* Everything that happens after this enqueues might race with + * the Tx confirmation callback for this frame + */ for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) { err = priv->enqueue(priv, fq, &fd, 0); if (err != -EBUSY) @@ -825,13 +833,10 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev) percpu_stats->tx_errors++; /* Clean up everything, including freeing the skb */ free_tx_fd(priv, fq, &fd, false); + netdev_tx_completed_queue(nq, 1, fd_len); } else { - fd_len = dpaa2_fd_get_len(&fd); percpu_stats->tx_packets++; percpu_stats->tx_bytes += fd_len; - - nq = netdev_get_tx_queue(net_dev, queue_mapping); - netdev_tx_sent_queue(nq, fd_len); } return NETDEV_TX_OK; -- cgit From 4cb6560514fa19d556954b88128f3846fee66a03 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Thu, 28 Feb 2019 22:57:33 +0100 Subject: leds: trigger: netdev: fix refcnt leak on interface rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Renaming a netdev-trigger-tracked interface was resulting in an unbalanced dev_hold(). Example: > iw phy phy0 interface add foo type __ap > echo netdev > trigger > echo foo > device_name > ip link set foo name bar > iw dev bar del [ 237.355366] unregister_netdevice: waiting for bar to become free. Usage count = 1 [ 247.435362] unregister_netdevice: waiting for bar to become free. Usage count = 1 [ 257.545366] unregister_netdevice: waiting for bar to become free. Usage count = 1 Above problem was caused by trigger checking a dev->name which obviously changes after renaming an interface. It meant missing all further events including the NETDEV_UNREGISTER which is required for calling dev_put(). This change fixes that by: 1) Comparing device struct *address* for notification-filtering purposes 2) Dropping unneeded NETDEV_CHANGENAME code (no behavior change) Fixes: 06f502f57d0d ("leds: trigger: Introduce a NETDEV trigger") Signed-off-by: Rafał Miłecki Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- drivers/leds/trigger/ledtrig-netdev.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index 3dd3ed46d473..167a94c02d05 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -301,11 +301,11 @@ static int netdev_trig_notify(struct notifier_block *nb, container_of(nb, struct led_netdev_data, notifier); if (evt != NETDEV_UP && evt != NETDEV_DOWN && evt != NETDEV_CHANGE - && evt != NETDEV_REGISTER && evt != NETDEV_UNREGISTER - && evt != NETDEV_CHANGENAME) + && evt != NETDEV_REGISTER && evt != NETDEV_UNREGISTER) return NOTIFY_DONE; - if (strcmp(dev->name, trigger_data->device_name)) + if (!(dev == trigger_data->net_dev || + (evt == NETDEV_REGISTER && !strcmp(dev->name, trigger_data->device_name)))) return NOTIFY_DONE; cancel_delayed_work_sync(&trigger_data->work); @@ -320,12 +320,9 @@ static int netdev_trig_notify(struct notifier_block *nb, dev_hold(dev); trigger_data->net_dev = dev; break; - case NETDEV_CHANGENAME: case NETDEV_UNREGISTER: - if (trigger_data->net_dev) { - dev_put(trigger_data->net_dev); - trigger_data->net_dev = NULL; - } + dev_put(trigger_data->net_dev); + trigger_data->net_dev = NULL; break; case NETDEV_UP: case NETDEV_CHANGE: -- cgit From 01f2f5b82a2b523ae76af53f2ff43c48dde10a00 Mon Sep 17 00:00:00 2001 From: Alakesh Haloi Date: Tue, 26 Mar 2019 02:00:01 +0000 Subject: SUNRPC: fix uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid following compiler warning on uninitialized variable net/sunrpc/xprtsock.c: In function ‘xs_read_stream_request.constprop’: net/sunrpc/xprtsock.c:525:10: warning: ‘read’ may be used uninitialized in this function [-Wmaybe-uninitialized] return read; ^~~~ net/sunrpc/xprtsock.c:529:23: warning: ‘ret’ may be used uninitialized in this function [-Wmaybe-uninitialized] return ret < 0 ? ret : read; ~~~~~~~~~~~~~~^~~~~~ Signed-off-by: Alakesh Haloi Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9359539907ba..732d4b57411a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -495,8 +495,8 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, int flags, struct rpc_rqst *req) { struct xdr_buf *buf = &req->rq_private_buf; - size_t want, read; - ssize_t ret; + size_t want, uninitialized_var(read); + ssize_t uninitialized_var(ret); xs_read_header(transport, buf); -- cgit From ce9afe08e71e3f7d64f337a6e932e50849230fc2 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Fri, 8 Mar 2019 21:03:24 +0530 Subject: powerpc/pseries/energy: Use OF accessor functions to read ibm,drc-indexes In cpu_to_drc_index() in the case when FW_FEATURE_DRC_INFO is absent, we currently use of_read_property() to obtain the pointer to the array corresponding to the property "ibm,drc-indexes". The elements of this array are of type __be32, but are accessed without any conversion to the OS-endianness, which is buggy on a Little Endian OS. Fix this by using of_property_read_u32_index() accessor function to safely read the elements of the array. Fixes: e83636ac3334 ("pseries/drc-info: Search DRC properties for CPU indexes") Cc: stable@vger.kernel.org # v4.16+ Reported-by: Pavithra R. Prakash Signed-off-by: Gautham R. Shenoy Reviewed-by: Vaidyanathan Srinivasan [mpe: Make the WARN_ON a WARN_ON_ONCE so it's not retriggerable] Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/pseries_energy.c | 27 ++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/pseries_energy.c b/arch/powerpc/platforms/pseries/pseries_energy.c index 6ed22127391b..921f12182f3e 100644 --- a/arch/powerpc/platforms/pseries/pseries_energy.c +++ b/arch/powerpc/platforms/pseries/pseries_energy.c @@ -77,18 +77,27 @@ static u32 cpu_to_drc_index(int cpu) ret = drc.drc_index_start + (thread_index * drc.sequential_inc); } else { - const __be32 *indexes; - - indexes = of_get_property(dn, "ibm,drc-indexes", NULL); - if (indexes == NULL) - goto err_of_node_put; + u32 nr_drc_indexes, thread_drc_index; /* - * The first element indexes[0] is the number of drc_indexes - * returned in the list. Hence thread_index+1 will get the - * drc_index corresponding to core number thread_index. + * The first element of ibm,drc-indexes array is the + * number of drc_indexes returned in the list. Hence + * thread_index+1 will get the drc_index corresponding + * to core number thread_index. */ - ret = indexes[thread_index + 1]; + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", + 0, &nr_drc_indexes); + if (rc) + goto err_of_node_put; + + WARN_ON_ONCE(thread_index > nr_drc_indexes); + rc = of_property_read_u32_index(dn, "ibm,drc-indexes", + thread_index + 1, + &thread_drc_index); + if (rc) + goto err_of_node_put; + + ret = thread_drc_index; } rc = 0; -- cgit From 71cd6cb234876e2232635bb2fdcfde5146f7fffd Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 26 Mar 2019 08:08:43 +0300 Subject: drm/i915/selftests: Fix an IS_ERR() vs NULL check The live_context() function returns error pointers. It never returns NULL. Fixes: 9c1477e83e62 ("drm/i915/selftests: Exercise adding requests to a full GGTT") Signed-off-by: Dan Carpenter Reviewed-by: Mika Kuoppala Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190326050843.GA20038@kadam (cherry picked from commit 602cbe8efc523ba56e1f41e8f74c7aa835672593) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 32dce7176f63..b9b0ea4e2404 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -455,7 +455,7 @@ static int igt_evict_contexts(void *arg) struct i915_gem_context *ctx; ctx = live_context(i915, file); - if (!ctx) + if (IS_ERR(ctx)) break; /* We will need some GGTT space for the rq's context */ -- cgit From fa59dd234c9a237e590a5f6db530d7f7ee88e5e8 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Tue, 26 Mar 2019 15:19:54 +1030 Subject: Revert "gpio: use new gpio_set_config() helper in more places" gpio-aspeed implements support for PIN_CONFIG_INPUT_DEBOUNCE. As of v5.1-rc1 we're seeing the following when booting a Romulus BMC kernel: > [ 21.373137] ------------[ cut here ]------------ > [ 21.374545] WARNING: CPU: 0 PID: 1 at drivers/gpio/gpio-aspeed.c:834 unregister_allocated_timer+0x38/0x94 > [ 21.376181] No timer allocated to offset 74 > [ 21.377672] CPU: 0 PID: 1 Comm: swapper Not tainted 5.1.0-rc1-dirty #6 > [ 21.378800] Hardware name: Generic DT based system > [ 21.379965] Backtrace: > [ 21.381024] [<80107d44>] (dump_backtrace) from [<80107f78>] (show_stack+0x20/0x24) > [ 21.382713] r7:8038b720 r6:00000009 r5:00000000 r4:87897c64 > [ 21.383815] [<80107f58>] (show_stack) from [<80656398>] (dump_stack+0x20/0x28) > [ 21.385042] [<80656378>] (dump_stack) from [<80115f1c>] (__warn.part.3+0xb4/0xdc) > [ 21.386253] [<80115e68>] (__warn.part.3) from [<80115fb0>] (warn_slowpath_fmt+0x6c/0x90) > [ 21.387471] r6:00000342 r5:807f8758 r4:80a07008 > [ 21.388278] [<80115f48>] (warn_slowpath_fmt) from [<8038b720>] (unregister_allocated_timer+0x38/0x94) > [ 21.389809] r3:0000004a r2:807f8774 > [ 21.390526] r7:00000000 r6:0000000a r5:60000153 r4:0000004a > [ 21.391601] [<8038b6e8>] (unregister_allocated_timer) from [<8038baac>] (aspeed_gpio_set_config+0x330/0x48c) > [ 21.393248] [<8038b77c>] (aspeed_gpio_set_config) from [<803840b0>] (gpiod_set_debounce+0xe8/0x114) > [ 21.394745] r10:82ee2248 r9:00000000 r8:87b63a00 r7:00001388 r6:87947320 r5:80729310 > [ 21.396030] r4:879f64a0 > [ 21.396499] [<80383fc8>] (gpiod_set_debounce) from [<804b4350>] (gpio_keys_probe+0x69c/0x8e0) > [ 21.397715] r7:845d94b8 r6:00000001 r5:00000000 r4:87b63a1c > [ 21.398618] [<804b3cb4>] (gpio_keys_probe) from [<8040eeec>] (platform_dev_probe+0x44/0x80) > [ 21.399834] r10:00000003 r9:80a3a8b0 r8:00000000 r7:00000000 r6:80a7f9dc r5:80a3a8b0 > [ 21.401163] r4:8796bc10 > [ 21.401634] [<8040eea8>] (platform_drv_probe) from [<8040d0d4>] (really_probe+0x208/0x3dc) > [ 21.402786] r5:80a7f8d0 r4:8796bc10 > [ 21.403547] [<8040cecc>] (really_probe) from [<8040d7a4>] (driver_probe_device+0x130/0x170) > [ 21.404744] r10:0000007b r9:8093683c r8:00000000 r7:80a07008 r6:80a3a8b0 r5:8796bc10 > [ 21.405854] r4:80a3a8b0 > [ 21.406324] [<8040d674>] (driver_probe_device) from [<8040da8c>] (device_driver_attach+0x68/0x70) > [ 21.407568] r9:8093683c r8:00000000 r7:80a07008 r6:80a3a8b0 r5:00000000 r4:8796bc10 > [ 21.408877] [<8040da24>] (device_driver_attach) from [<8040db14>] (__driver_attach+0x80/0x150) > [ 21.410327] r7:80a07008 r6:8796bc10 r5:00000001 r4:80a3a8b0 > [ 21.411294] [<8040da94>] (__driver_attach) from [<8040b20c>] (bus_for_each_dev+0x80/0xc4) > [ 21.412641] r7:80a07008 r6:8040da94 r5:80a3a8b0 r4:87966f30 > [ 21.413580] [<8040b18c>] (bus_for_each_dev) from [<8040dc0c>] (driver_attach+0x28/0x30) > [ 21.414943] r7:00000000 r6:87b411e0 r5:80a33fc8 r4:80a3a8b0 > [ 21.415927] [<8040dbe4>] (driver_attach) from [<8040bbf0>] (bus_add_driver+0x14c/0x200) > [ 21.417289] [<8040baa4>] (bus_add_driver) from [<8040e2b4>] (driver_register+0x84/0x118) > [ 21.418652] r7:80a60ae0 r6:809226b8 r5:80a07008 r4:80a3a8b0 > [ 21.419652] [<8040e230>] (driver_register) from [<8040fc28>] (__platform_driver_register+0x3c/0x50) > [ 21.421193] r5:80a07008 r4:809525f8 > [ 21.421990] [<8040fbec>] (__platform_driver_register) from [<809226d8>] (gpio_keys_init+0x20/0x28) > [ 21.423447] [<809226b8>] (gpio_keys_init) from [<8090128c>] (do_one_initcall+0x80/0x180) > [ 21.424886] [<8090120c>] (do_one_initcall) from [<80901538>] (kernel_init_freeable+0x1ac/0x26c) > [ 21.426354] r8:80a60ae0 r7:80a60ae0 r6:8093685c r5:00000008 r4:809525f8 > [ 21.427579] [<8090138c>] (kernel_init_freeable) from [<8066d9a0>] (kernel_init+0x18/0x11c) > [ 21.428819] r10:00000000 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:8066d988 > [ 21.429947] r4:00000000 > [ 21.430415] [<8066d988>] (kernel_init) from [<801010e8>] (ret_from_fork+0x14/0x2c) > [ 21.431666] Exception stack(0x87897fb0 to 0x87897ff8) > [ 21.432877] 7fa0: 00000000 00000000 00000000 00000000 > [ 21.434446] 7fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000 > [ 21.436052] 7fe0: 00000000 00000000 00000000 00000000 00000013 00000000 > [ 21.437308] r5:8066d988 r4:00000000 > [ 21.438102] ---[ end trace d7d7ac3a80567d0e ]--- We only hit unregister_allocated_timer() if the argument to aspeed_gpio_set_config() is 0, but we can't be calling through gpiod_set_debounce() from gpio_keys_probe() unless the gpio-keys button has a non-zero debounce interval. Commit 6581eaf0e890 ("gpio: use new gpio_set_config() helper in more places") spreads the use of gpio_set_config() to the debounce and transitory state configuration paths. The implementation of gpio_set_config() is: > static int gpio_set_config(struct gpio_chip *gc, unsigned offset, > enum pin_config_param mode) > { > unsigned long config = { PIN_CONF_PACKED(mode, 0) }; > > return gc->set_config ? gc->set_config(gc, offset, config) : -ENOTSUPP; > } Here it packs its own config value with a fixed argument of 0; this is incorrect behaviour for implementing the debounce and transitory functions, and the debounce and transitory gpio_set_config() call-sites now have an undetected type mismatch as they both already pack their own config parameter (i.e. what gets passed is not an `enum pin_config_param`). Indeed this can be seen in the small diff for 6581eaf0e890: > diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c > index de595fa31a1a..1f239aac43df 100644 > --- a/drivers/gpio/gpiolib.c > +++ b/drivers/gpio/gpiolib.c > @@ -2725,7 +2725,7 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) > } > > config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce); > - return chip->set_config(chip, gpio_chip_hwgpio(desc), config); > + return gpio_set_config(chip, gpio_chip_hwgpio(desc), config); > } > EXPORT_SYMBOL_GPL(gpiod_set_debounce); > > @@ -2762,7 +2762,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) > packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE, > !transitory); > gpio = gpio_chip_hwgpio(desc); > - rc = chip->set_config(chip, gpio, packed); > + rc = gpio_set_config(chip, gpio, packed); > if (rc == -ENOTSUPP) { > dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n", > gpio); Revert commit 6581eaf0e890 ("gpio: use new gpio_set_config() helper in more places") to restore correct behaviour for gpiod_set_debounce() and gpiod_set_transitory(). Cc: Thomas Petazzoni Signed-off-by: Andrew Jeffery Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpiolib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index 144af0733581..0495bf1d480a 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -2776,7 +2776,7 @@ int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce) } config = pinconf_to_config_packed(PIN_CONFIG_INPUT_DEBOUNCE, debounce); - return gpio_set_config(chip, gpio_chip_hwgpio(desc), config); + return chip->set_config(chip, gpio_chip_hwgpio(desc), config); } EXPORT_SYMBOL_GPL(gpiod_set_debounce); @@ -2813,7 +2813,7 @@ int gpiod_set_transitory(struct gpio_desc *desc, bool transitory) packed = pinconf_to_config_packed(PIN_CONFIG_PERSIST_STATE, !transitory); gpio = gpio_chip_hwgpio(desc); - rc = gpio_set_config(chip, gpio, packed); + rc = chip->set_config(chip, gpio, packed); if (rc == -ENOTSUPP) { dev_dbg(&desc->gdev->dev, "Persistence not supported for GPIO %d\n", gpio); -- cgit From 2583303debb7acc77295b77901916d08a4c743c2 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Fri, 22 Mar 2019 18:27:12 +0100 Subject: gpio: mockup: fix debugfs read The debugfs read callback must advance ppos or users using read() on the file descriptor will never get the EOL. This wasn't spotted before as I was using busybox cat for testing which uses sendfile() internally and only noticed it now when switched to cat from coreutils. Fixes: 2a9e27408e12 ("gpio: mockup: rework debugfs interface") Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mockup.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 154d959e8993..74ba8b1d71d8 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -204,8 +204,9 @@ static ssize_t gpio_mockup_debugfs_read(struct file *file, struct gpio_mockup_chip *chip; struct seq_file *sfile; struct gpio_chip *gc; + int val, rv, cnt; char buf[3]; - int val, rv; + if (*ppos != 0) return 0; @@ -216,13 +217,14 @@ static ssize_t gpio_mockup_debugfs_read(struct file *file, gc = &chip->gc; val = gpio_mockup_get(gc, priv->offset); - snprintf(buf, sizeof(buf), "%d\n", val); + cnt = snprintf(buf, sizeof(buf), "%d\n", val); - rv = copy_to_user(usr_buf, buf, sizeof(buf)); + rv = copy_to_user(usr_buf, buf, cnt); if (rv) return rv; - return sizeof(buf) - 1; + *ppos += cnt; + return cnt; } static ssize_t gpio_mockup_debugfs_write(struct file *file, -- cgit From 0f02daed4e089c7a380a0ffdc9d93a5989043cf4 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 4 Mar 2019 13:55:46 +0800 Subject: x86/boot: Fix incorrect ifdeffery scope The declarations related to immovable memory handling are out of the BOOT_COMPRESSED_MISC_H #ifdef scope, wrap them inside. Signed-off-by: Baoquan He Signed-off-by: Borislav Petkov Cc: Chao Fan Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Juergen Gross Cc: "Kirill A. Shutemov" Cc: Thomas Gleixner Cc: Tom Lendacky Cc: x86-ml Link: https://lkml.kernel.org/r/20190304055546.18566-1-bhe@redhat.com --- arch/x86/boot/compressed/misc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index fd13655e0f9b..d2f184165934 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -120,8 +120,6 @@ static inline void console_init(void) void set_sev_encryption_mask(void); -#endif - /* acpi.c */ #ifdef CONFIG_ACPI acpi_physical_address get_rsdp_addr(void); @@ -135,3 +133,5 @@ int count_immovable_mem_regions(void); #else static inline int count_immovable_mem_regions(void) { return 0; } #endif + +#endif /* BOOT_COMPRESSED_MISC_H */ -- cgit From c4dcd89d20a8fe4009d25660c69396611328cc5e Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Mar 2019 00:04:19 +0100 Subject: i2c: iop3xx: make bindings file name match the driver If we use the "i2c-" prefix for the binding documentation file name, then it should match the file name of the driver, if possible. It is possible for this driver, so rename it. Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt | 20 ++++++++++++++++++++ Documentation/devicetree/bindings/i2c/i2c-xscale.txt | 20 -------------------- 2 files changed, 20 insertions(+), 20 deletions(-) create mode 100644 Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-xscale.txt diff --git a/Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt b/Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt new file mode 100644 index 000000000000..dcc8390e0d24 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-iop3xx.txt @@ -0,0 +1,20 @@ +i2c Controller on XScale platforms such as IOP3xx and IXP4xx + +Required properties: +- compatible : Must be one of + "intel,iop3xx-i2c" + "intel,ixp4xx-i2c"; +- reg +- #address-cells = <1>; +- #size-cells = <0>; + +Optional properties: +- Child nodes conforming to i2c bus binding + +Example: + +i2c@c8011000 { + compatible = "intel,ixp4xx-i2c"; + reg = <0xc8011000 0x18>; + interrupts = <33 IRQ_TYPE_LEVEL_LOW>; +}; diff --git a/Documentation/devicetree/bindings/i2c/i2c-xscale.txt b/Documentation/devicetree/bindings/i2c/i2c-xscale.txt deleted file mode 100644 index dcc8390e0d24..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-xscale.txt +++ /dev/null @@ -1,20 +0,0 @@ -i2c Controller on XScale platforms such as IOP3xx and IXP4xx - -Required properties: -- compatible : Must be one of - "intel,iop3xx-i2c" - "intel,ixp4xx-i2c"; -- reg -- #address-cells = <1>; -- #size-cells = <0>; - -Optional properties: -- Child nodes conforming to i2c bus binding - -Example: - -i2c@c8011000 { - compatible = "intel,ixp4xx-i2c"; - reg = <0xc8011000 0x18>; - interrupts = <33 IRQ_TYPE_LEVEL_LOW>; -}; -- cgit From 94c87527f4e1ebf85936f707ec84ff458f3bbb00 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Mar 2019 00:04:20 +0100 Subject: i2c: mt65xx: make bindings file name match the driver If we use the "i2c-" prefix for the binding documentation file name, then it should match the file name of the driver, if possible. It is possible for this driver, so rename it. Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/i2c-mt65xx.txt | 44 ++++++++++++++++++++++ Documentation/devicetree/bindings/i2c/i2c-mtk.txt | 44 ---------------------- 2 files changed, 44 insertions(+), 44 deletions(-) create mode 100644 Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-mtk.txt diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt new file mode 100644 index 000000000000..ee4c32454198 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.txt @@ -0,0 +1,44 @@ +* MediaTek's I2C controller + +The MediaTek's I2C controller is used to interface with I2C devices. + +Required properties: + - compatible: value should be either of the following. + "mediatek,mt2701-i2c", "mediatek,mt6577-i2c": for MediaTek MT2701 + "mediatek,mt2712-i2c": for MediaTek MT2712 + "mediatek,mt6577-i2c": for MediaTek MT6577 + "mediatek,mt6589-i2c": for MediaTek MT6589 + "mediatek,mt7622-i2c": for MediaTek MT7622 + "mediatek,mt7623-i2c", "mediatek,mt6577-i2c": for MediaTek MT7623 + "mediatek,mt7629-i2c", "mediatek,mt2712-i2c": for MediaTek MT7629 + "mediatek,mt8173-i2c": for MediaTek MT8173 + - reg: physical base address of the controller and dma base, length of memory + mapped region. + - interrupts: interrupt number to the cpu. + - clock-div: the fixed value for frequency divider of clock source in i2c + module. Each IC may be different. + - clocks: clock name from clock manager + - clock-names: Must include "main" and "dma", if enable have-pmic need include + "pmic" extra. + +Optional properties: + - clock-frequency: Frequency in Hz of the bus when transfer, the default value + is 100000. + - mediatek,have-pmic: platform can control i2c form special pmic side. + Only mt6589 and mt8135 support this feature. + - mediatek,use-push-pull: IO config use push-pull mode. + +Example: + + i2c0: i2c@1100d000 { + compatible = "mediatek,mt6577-i2c"; + reg = <0x1100d000 0x70>, + <0x11000300 0x80>; + interrupts = ; + clock-frequency = <400000>; + mediatek,have-pmic; + clock-div = <16>; + clocks = <&i2c0_ck>, <&ap_dma_ck>; + clock-names = "main", "dma"; + }; + diff --git a/Documentation/devicetree/bindings/i2c/i2c-mtk.txt b/Documentation/devicetree/bindings/i2c/i2c-mtk.txt deleted file mode 100644 index ee4c32454198..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-mtk.txt +++ /dev/null @@ -1,44 +0,0 @@ -* MediaTek's I2C controller - -The MediaTek's I2C controller is used to interface with I2C devices. - -Required properties: - - compatible: value should be either of the following. - "mediatek,mt2701-i2c", "mediatek,mt6577-i2c": for MediaTek MT2701 - "mediatek,mt2712-i2c": for MediaTek MT2712 - "mediatek,mt6577-i2c": for MediaTek MT6577 - "mediatek,mt6589-i2c": for MediaTek MT6589 - "mediatek,mt7622-i2c": for MediaTek MT7622 - "mediatek,mt7623-i2c", "mediatek,mt6577-i2c": for MediaTek MT7623 - "mediatek,mt7629-i2c", "mediatek,mt2712-i2c": for MediaTek MT7629 - "mediatek,mt8173-i2c": for MediaTek MT8173 - - reg: physical base address of the controller and dma base, length of memory - mapped region. - - interrupts: interrupt number to the cpu. - - clock-div: the fixed value for frequency divider of clock source in i2c - module. Each IC may be different. - - clocks: clock name from clock manager - - clock-names: Must include "main" and "dma", if enable have-pmic need include - "pmic" extra. - -Optional properties: - - clock-frequency: Frequency in Hz of the bus when transfer, the default value - is 100000. - - mediatek,have-pmic: platform can control i2c form special pmic side. - Only mt6589 and mt8135 support this feature. - - mediatek,use-push-pull: IO config use push-pull mode. - -Example: - - i2c0: i2c@1100d000 { - compatible = "mediatek,mt6577-i2c"; - reg = <0x1100d000 0x70>, - <0x11000300 0x80>; - interrupts = ; - clock-frequency = <400000>; - mediatek,have-pmic; - clock-div = <16>; - clocks = <&i2c0_ck>, <&ap_dma_ck>; - clock-names = "main", "dma"; - }; - -- cgit From 0a96f9ffbfe9446b5c4c67461085236d578248e5 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Mar 2019 00:04:21 +0100 Subject: i2c: stu300: make bindings file name match the driver If we use the "i2c-" prefix for the binding documentation file name, then it should match the file name of the driver, if possible. It is possible for this driver, so rename it. Signed-off-by: Wolfram Sang --- Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt | 15 --------------- Documentation/devicetree/bindings/i2c/i2c-stu300.txt | 15 +++++++++++++++ 2 files changed, 15 insertions(+), 15 deletions(-) delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt create mode 100644 Documentation/devicetree/bindings/i2c/i2c-stu300.txt diff --git a/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt b/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt deleted file mode 100644 index bd81a482634f..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-st-ddci2c.txt +++ /dev/null @@ -1,15 +0,0 @@ -ST Microelectronics DDC I2C - -Required properties : -- compatible : Must be "st,ddci2c" -- reg: physical base address of the controller and length of memory mapped - region. -- interrupts: interrupt number to the cpu. -- #address-cells = <1>; -- #size-cells = <0>; - -Optional properties: -- Child nodes conforming to i2c bus binding - -Examples : - diff --git a/Documentation/devicetree/bindings/i2c/i2c-stu300.txt b/Documentation/devicetree/bindings/i2c/i2c-stu300.txt new file mode 100644 index 000000000000..bd81a482634f --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-stu300.txt @@ -0,0 +1,15 @@ +ST Microelectronics DDC I2C + +Required properties : +- compatible : Must be "st,ddci2c" +- reg: physical base address of the controller and length of memory mapped + region. +- interrupts: interrupt number to the cpu. +- #address-cells = <1>; +- #size-cells = <0>; + +Optional properties: +- Child nodes conforming to i2c bus binding + +Examples : + -- cgit From 45dfceb0d14a06eeffe22581eb2996f4ed5225ca Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Mar 2019 00:04:22 +0100 Subject: i2c: sun6i-p2wi: make bindings file name match the driver If we use the "i2c-" prefix for the binding documentation file name, then it should match the file name of the driver, if possible. It is possible for this driver, so rename it. Signed-off-by: Wolfram Sang Acked-by: Maxime Ripard --- .../devicetree/bindings/i2c/i2c-sun6i-p2wi.txt | 41 ++++++++++++++++++++++ .../devicetree/bindings/i2c/i2c-sunxi-p2wi.txt | 41 ---------------------- 2 files changed, 41 insertions(+), 41 deletions(-) create mode 100644 Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt diff --git a/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt b/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt new file mode 100644 index 000000000000..49df0053347a --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-sun6i-p2wi.txt @@ -0,0 +1,41 @@ + +* Allwinner P2WI (Push/Pull 2 Wire Interface) controller + +Required properties : + + - reg : Offset and length of the register set for the device. + - compatible : Should one of the following: + - "allwinner,sun6i-a31-p2wi" + - interrupts : The interrupt line connected to the P2WI peripheral. + - clocks : The gate clk connected to the P2WI peripheral. + - resets : The reset line connected to the P2WI peripheral. + +Optional properties : + + - clock-frequency : Desired P2WI bus clock frequency in Hz. If not set the +default frequency is 100kHz + +A P2WI may contain one child node encoding a P2WI slave device. + +Slave device properties: + Required properties: + - reg : the I2C slave address used during the initialization + process to switch from I2C to P2WI mode + +Example: + + p2wi@1f03400 { + compatible = "allwinner,sun6i-a31-p2wi"; + reg = <0x01f03400 0x400>; + interrupts = <0 39 4>; + clocks = <&apb0_gates 3>; + clock-frequency = <6000000>; + resets = <&apb0_rst 3>; + + axp221: pmic@68 { + compatible = "x-powers,axp221"; + reg = <0x68>; + + /* ... */ + }; + }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt b/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt deleted file mode 100644 index 49df0053347a..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-sunxi-p2wi.txt +++ /dev/null @@ -1,41 +0,0 @@ - -* Allwinner P2WI (Push/Pull 2 Wire Interface) controller - -Required properties : - - - reg : Offset and length of the register set for the device. - - compatible : Should one of the following: - - "allwinner,sun6i-a31-p2wi" - - interrupts : The interrupt line connected to the P2WI peripheral. - - clocks : The gate clk connected to the P2WI peripheral. - - resets : The reset line connected to the P2WI peripheral. - -Optional properties : - - - clock-frequency : Desired P2WI bus clock frequency in Hz. If not set the -default frequency is 100kHz - -A P2WI may contain one child node encoding a P2WI slave device. - -Slave device properties: - Required properties: - - reg : the I2C slave address used during the initialization - process to switch from I2C to P2WI mode - -Example: - - p2wi@1f03400 { - compatible = "allwinner,sun6i-a31-p2wi"; - reg = <0x01f03400 0x400>; - interrupts = <0 39 4>; - clocks = <&apb0_gates 3>; - clock-frequency = <6000000>; - resets = <&apb0_rst 3>; - - axp221: pmic@68 { - compatible = "x-powers,axp221"; - reg = <0x68>; - - /* ... */ - }; - }; -- cgit From 080a910414659dfd18ffaf60a1e95b96c3f50eab Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 Mar 2019 00:04:23 +0100 Subject: i2c: wmt: make bindings file name match the driver If we use the "i2c-" prefix for the binding documentation file name, then it should match the file name of the driver, if possible. It is possible for this driver, so rename it. Signed-off-by: Wolfram Sang --- .../devicetree/bindings/i2c/i2c-vt8500.txt | 24 ---------------------- Documentation/devicetree/bindings/i2c/i2c-wmt.txt | 24 ++++++++++++++++++++++ 2 files changed, 24 insertions(+), 24 deletions(-) delete mode 100644 Documentation/devicetree/bindings/i2c/i2c-vt8500.txt create mode 100644 Documentation/devicetree/bindings/i2c/i2c-wmt.txt diff --git a/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt b/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt deleted file mode 100644 index 94a425eaa6c7..000000000000 --- a/Documentation/devicetree/bindings/i2c/i2c-vt8500.txt +++ /dev/null @@ -1,24 +0,0 @@ -* Wondermedia I2C Controller - -Required properties : - - - compatible : should be "wm,wm8505-i2c" - - reg : Offset and length of the register set for the device - - interrupts : where IRQ is the interrupt number - - clocks : phandle to the I2C clock source - -Optional properties : - - - clock-frequency : desired I2C bus clock frequency in Hz. - Valid values are 100000 and 400000. - Default to 100000 if not specified, or invalid value. - -Example : - - i2c_0: i2c@d8280000 { - compatible = "wm,wm8505-i2c"; - reg = <0xd8280000 0x1000>; - interrupts = <19>; - clocks = <&clki2c0>; - clock-frequency = <400000>; - }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-wmt.txt b/Documentation/devicetree/bindings/i2c/i2c-wmt.txt new file mode 100644 index 000000000000..94a425eaa6c7 --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/i2c-wmt.txt @@ -0,0 +1,24 @@ +* Wondermedia I2C Controller + +Required properties : + + - compatible : should be "wm,wm8505-i2c" + - reg : Offset and length of the register set for the device + - interrupts : where IRQ is the interrupt number + - clocks : phandle to the I2C clock source + +Optional properties : + + - clock-frequency : desired I2C bus clock frequency in Hz. + Valid values are 100000 and 400000. + Default to 100000 if not specified, or invalid value. + +Example : + + i2c_0: i2c@d8280000 { + compatible = "wm,wm8505-i2c"; + reg = <0xd8280000 0x1000>; + interrupts = <19>; + clocks = <&clki2c0>; + clock-frequency = <400000>; + }; -- cgit From b929a500d68479163c48739d809cbf4c1335db6f Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 26 Mar 2019 21:30:46 +0100 Subject: x86/realmode: Don't leak the trampoline kernel address Since commit ad67b74d2469 ("printk: hash addresses printed with %p") at boot "____ptrval____" is printed instead of the trampoline addresses: Base memory trampoline at [(____ptrval____)] 99000 size 24576 Remove the print as we don't want to leak kernel addresses and this statement is not needed anymore. Fixes: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") Signed-off-by: Matteo Croce Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190326203046.20787-1-mcroce@redhat.com --- arch/x86/realmode/init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index d10105825d57..47d097946872 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -20,8 +20,6 @@ void __init set_real_mode_mem(phys_addr_t mem, size_t size) void *base = __va(mem); real_mode_header = (struct real_mode_header *) base; - printk(KERN_DEBUG "Base memory trampoline at [%p] %llx size %zu\n", - base, (unsigned long long)mem, size); } void __init reserve_real_mode(void) -- cgit From 93e1c8a638308980309e009cc40b5a57ef87caf1 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Fri, 22 Mar 2019 16:53:02 +0100 Subject: usb: cdc-acm: fix race during wakeup blocking TX traffic When the kernel is compiled with preemption enabled, the URB completion handler can run in parallel with the work responsible for waking up the tty layer. If the URB handler sets the EVENT_TTY_WAKEUP bit during the call to tty_port_tty_wakeup() to signal that there is room for additional input, it will be cleared at the end of this call. As a result, TX traffic on the upper layer will be blocked. This can be seen with a kernel configured with CONFIG_PREEMPT, and a fast modem connected with PPP running over a USB CDC-ACM port. Use test_and_clear_bit() instead, which ensures that each wakeup requested by the URB completion code will trigger a call to tty_port_tty_wakeup(). Fixes: 1aba579f3cf5 cdc-acm: handle read pipe errors Signed-off-by: Romain Izard Cc: stable Acked-by: Oliver Neukum Signed-off-by: Greg Kroah-Hartman --- drivers/usb/class/cdc-acm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 739f8960811a..ec666eb4b7b4 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -558,10 +558,8 @@ static void acm_softint(struct work_struct *work) clear_bit(EVENT_RX_STALL, &acm->flags); } - if (test_bit(EVENT_TTY_WAKEUP, &acm->flags)) { + if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) tty_port_tty_wakeup(&acm->port); - clear_bit(EVENT_TTY_WAKEUP, &acm->flags); - } } /* -- cgit From f276e002793cdb820862e8ea8f76769d56bba575 Mon Sep 17 00:00:00 2001 From: Mukesh Ojha Date: Tue, 26 Mar 2019 13:42:22 +0530 Subject: usb: u132-hcd: fix resource leak if platform_driver_register fails, cleanup the allocated resource gracefully. Signed-off-by: Mukesh Ojha Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/u132-hcd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c index 934584f0a20a..6343fbacd244 100644 --- a/drivers/usb/host/u132-hcd.c +++ b/drivers/usb/host/u132-hcd.c @@ -3204,6 +3204,9 @@ static int __init u132_hcd_init(void) printk(KERN_INFO "driver %s\n", hcd_name); workqueue = create_singlethread_workqueue("u132"); retval = platform_driver_register(&u132_platform_driver); + if (retval) + destroy_workqueue(workqueue); + return retval; } -- cgit From f3040983132bf3477acd45d2452a906e67c2fec9 Mon Sep 17 00:00:00 2001 From: Razvan Stefanescu Date: Tue, 19 Mar 2019 15:20:34 +0200 Subject: tty/serial: atmel: Add is_half_duplex helper Use a helper function to check that a port needs to use half duplex communication, replacing several occurrences of multi-line bit checking. Fixes: b389f173aaa1 ("tty/serial: atmel: RS485 half duplex w/DMA: enable RX after TX is done") Cc: stable Signed-off-by: Razvan Stefanescu Acked-by: Richard Genoud Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 41b728d223d1..55ca3416113f 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -231,6 +231,13 @@ static inline void atmel_uart_write_char(struct uart_port *port, u8 value) __raw_writeb(value, port->membase + ATMEL_US_THR); } +static inline int atmel_uart_is_half_duplex(struct uart_port *port) +{ + return ((port->rs485.flags & SER_RS485_ENABLED) && + !(port->rs485.flags & SER_RS485_RX_DURING_TX)) || + (port->iso7816.flags & SER_ISO7816_ENABLED); +} + #ifdef CONFIG_SERIAL_ATMEL_PDC static bool atmel_use_pdc_rx(struct uart_port *port) { @@ -608,10 +615,9 @@ static void atmel_stop_tx(struct uart_port *port) /* Disable interrupts */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); - if (((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) || - port->iso7816.flags & SER_ISO7816_ENABLED) + if (atmel_uart_is_half_duplex(port)) atmel_start_rx(port); + } /* @@ -628,9 +634,7 @@ static void atmel_start_tx(struct uart_port *port) return; if (atmel_use_pdc_tx(port) || atmel_use_dma_tx(port)) - if (((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) || - port->iso7816.flags & SER_ISO7816_ENABLED) + if (atmel_uart_is_half_duplex(port)) atmel_stop_rx(port); if (atmel_use_pdc_tx(port)) @@ -928,9 +932,7 @@ static void atmel_complete_tx_dma(void *arg) */ if (!uart_circ_empty(xmit)) atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx); - else if (((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) || - port->iso7816.flags & SER_ISO7816_ENABLED) { + else if (atmel_uart_is_half_duplex(port)) { /* DMA done, stop TX, start RX for RS485 */ atmel_start_rx(port); } @@ -1512,9 +1514,7 @@ static void atmel_tx_pdc(struct uart_port *port) atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask); } else { - if (((port->rs485.flags & SER_RS485_ENABLED) && - !(port->rs485.flags & SER_RS485_RX_DURING_TX)) || - port->iso7816.flags & SER_ISO7816_ENABLED) { + if (atmel_uart_is_half_duplex(port)) { /* DMA done, stop TX, start RX for RS485 */ atmel_start_rx(port); } -- cgit From 69646d7a3689fbe1a65ae90397d22ac3f1b8d40f Mon Sep 17 00:00:00 2001 From: Razvan Stefanescu Date: Tue, 19 Mar 2019 15:20:35 +0200 Subject: tty/serial: atmel: RS485 HD w/DMA: enable RX after TX is stopped In half-duplex operation, RX should be started after TX completes. If DMA is used, there is a case when the DMA transfer completes but the TX FIFO is not emptied, so the RX cannot be restarted just yet. Use a boolean variable to store this state and rearm TX interrupt mask to be signaled again that the transfer finished. In interrupt transmit handler this variable is used to start RX. A warning message is generated if RX is activated before TX fifo is cleared. Fixes: b389f173aaa1 ("tty/serial: atmel: RS485 half duplex w/DMA: enable RX after TX is done") Signed-off-by: Razvan Stefanescu Acked-by: Richard Genoud Cc: stable Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/atmel_serial.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 55ca3416113f..0b4f36905321 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -166,6 +166,8 @@ struct atmel_uart_port { unsigned int pending_status; spinlock_t lock_suspended; + bool hd_start_rx; /* can start RX during half-duplex operation */ + /* ISO7816 */ unsigned int fidi_min; unsigned int fidi_max; @@ -933,8 +935,13 @@ static void atmel_complete_tx_dma(void *arg) if (!uart_circ_empty(xmit)) atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx); else if (atmel_uart_is_half_duplex(port)) { - /* DMA done, stop TX, start RX for RS485 */ - atmel_start_rx(port); + /* + * DMA done, re-enable TXEMPTY and signal that we can stop + * TX and start RX for RS485 + */ + atmel_port->hd_start_rx = true; + atmel_uart_writel(port, ATMEL_US_IER, + atmel_port->tx_done_mask); } spin_unlock_irqrestore(&port->lock, flags); @@ -1382,9 +1389,20 @@ atmel_handle_transmit(struct uart_port *port, unsigned int pending) struct atmel_uart_port *atmel_port = to_atmel_uart_port(port); if (pending & atmel_port->tx_done_mask) { - /* Either PDC or interrupt transmission */ atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask); + + /* Start RX if flag was set and FIFO is empty */ + if (atmel_port->hd_start_rx) { + if (!(atmel_uart_readl(port, ATMEL_US_CSR) + & ATMEL_US_TXEMPTY)) + dev_warn(port->dev, "Should start RX, but TX fifo is not empty\n"); + + atmel_port->hd_start_rx = false; + atmel_start_rx(port); + return; + } + atmel_tasklet_schedule(atmel_port, &atmel_port->tasklet_tx); } } -- cgit From 898a737c8a436b2fcd6dcb0b57775ada2f846a26 Mon Sep 17 00:00:00 2001 From: Erin Lo Date: Mon, 11 Mar 2019 16:54:31 +0800 Subject: dt-bindings: serial: Add compatible for Mediatek MT8183 This adds dt-binding documentation of uart for Mediatek MT8183 SoC Platform. Signed-off-by: Erin Lo Acked-by: Rob Herring Acked-by: Matthias Brugger Signed-off-by: Greg Kroah-Hartman --- Documentation/devicetree/bindings/serial/mtk-uart.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/serial/mtk-uart.txt b/Documentation/devicetree/bindings/serial/mtk-uart.txt index 742cb470595b..bcfb13194f16 100644 --- a/Documentation/devicetree/bindings/serial/mtk-uart.txt +++ b/Documentation/devicetree/bindings/serial/mtk-uart.txt @@ -16,6 +16,7 @@ Required properties: * "mediatek,mt8127-uart" for MT8127 compatible UARTS * "mediatek,mt8135-uart" for MT8135 compatible UARTS * "mediatek,mt8173-uart" for MT8173 compatible UARTS + * "mediatek,mt8183-uart", "mediatek,mt6577-uart" for MT8183 compatible UARTS * "mediatek,mt6577-uart" for MT6577 and all of the above - reg: The base address of the UART register bank. -- cgit From 3ec8002951ea173e24b466df1ea98c56b7920e63 Mon Sep 17 00:00:00 2001 From: Wentao Wang Date: Wed, 20 Mar 2019 15:30:39 +0000 Subject: Disable kgdboc failed by echo space to /sys/module/kgdboc/parameters/kgdboc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Echo "" to /sys/module/kgdboc/parameters/kgdboc will fail with "No such device” error. This is caused by function "configure_kgdboc" who init err to ENODEV when the config is empty (legal input) the code go out with ENODEV returned. Fixes: 2dd453168643 ("kgdboc: Fix restrict error") Signed-off-by: Wentao Wang Cc: stable Acked-by: Daniel Thompson Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/kgdboc.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/tty/serial/kgdboc.c b/drivers/tty/serial/kgdboc.c index 6fb312e7af71..bfe5e9e034ec 100644 --- a/drivers/tty/serial/kgdboc.c +++ b/drivers/tty/serial/kgdboc.c @@ -148,8 +148,10 @@ static int configure_kgdboc(void) char *cptr = config; struct console *cons; - if (!strlen(config) || isspace(config[0])) + if (!strlen(config) || isspace(config[0])) { + err = 0; goto noconfig; + } kgdboc_io_ops.is_console = 0; kgdb_tty_driver = NULL; -- cgit From f4e68d58cf2b20a581759bbc7228052534652673 Mon Sep 17 00:00:00 2001 From: Fabien Dessenne Date: Thu, 21 Mar 2019 16:43:26 +0100 Subject: tty: fix NULL pointer issue when tty_port ops is not set Unlike 'client_ops' which is initialized to 'default_client_ops', the port operations 'ops' may be left to NULL. Check the 'ops' value before checking the 'ops->x' value. Signed-off-by: Fabien Dessenne Signed-off-by: Greg Kroah-Hartman --- drivers/tty/tty_port.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 044c3cbdcfa4..a9e12b3bc31d 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -325,7 +325,7 @@ static void tty_port_shutdown(struct tty_port *port, struct tty_struct *tty) if (tty && C_HUPCL(tty)) tty_port_lower_dtr_rts(port); - if (port->ops->shutdown) + if (port->ops && port->ops->shutdown) port->ops->shutdown(port); } out: @@ -398,7 +398,7 @@ EXPORT_SYMBOL_GPL(tty_port_tty_wakeup); */ int tty_port_carrier_raised(struct tty_port *port) { - if (port->ops->carrier_raised == NULL) + if (!port->ops || !port->ops->carrier_raised) return 1; return port->ops->carrier_raised(port); } @@ -414,7 +414,7 @@ EXPORT_SYMBOL(tty_port_carrier_raised); */ void tty_port_raise_dtr_rts(struct tty_port *port) { - if (port->ops->dtr_rts) + if (port->ops && port->ops->dtr_rts) port->ops->dtr_rts(port, 1); } EXPORT_SYMBOL(tty_port_raise_dtr_rts); @@ -429,7 +429,7 @@ EXPORT_SYMBOL(tty_port_raise_dtr_rts); */ void tty_port_lower_dtr_rts(struct tty_port *port) { - if (port->ops->dtr_rts) + if (port->ops && port->ops->dtr_rts) port->ops->dtr_rts(port, 0); } EXPORT_SYMBOL(tty_port_lower_dtr_rts); @@ -684,7 +684,7 @@ int tty_port_open(struct tty_port *port, struct tty_struct *tty, if (!tty_port_initialized(port)) { clear_bit(TTY_IO_ERROR, &tty->flags); - if (port->ops->activate) { + if (port->ops && port->ops->activate) { int retval = port->ops->activate(port, tty); if (retval) { mutex_unlock(&port->mutex); -- cgit From 0532a1b0d045115521a93acf28f1270df89ad806 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 22 Mar 2019 09:19:34 +0100 Subject: virt: vbox: Implement passing requestor info to the host for VirtualBox 6.0.x VirtualBox 6.0.x has a new feature where the guest kernel driver passes info about the origin of the request (e.g. userspace or kernelspace) to the hypervisor. If we do not pass this information then when running the 6.0.x userspace guest-additions tools on a 6.0.x host, some requests will get denied with a VERR_VERSION_MISMATCH error, breaking vboxservice.service and the mounting of shared folders marked to be auto-mounted. This commit implements passing the requestor info to the host, fixing this. Signed-off-by: Hans de Goede Signed-off-by: Greg Kroah-Hartman --- drivers/virt/vboxguest/vboxguest_core.c | 106 ++++++++++++++++++++--------- drivers/virt/vboxguest/vboxguest_core.h | 15 ++-- drivers/virt/vboxguest/vboxguest_linux.c | 26 ++++++- drivers/virt/vboxguest/vboxguest_utils.c | 32 +++++---- drivers/virt/vboxguest/vboxguest_version.h | 9 ++- drivers/virt/vboxguest/vmmdev.h | 8 ++- include/linux/vbox_utils.h | 12 ++-- include/uapi/linux/vbox_vmmdev_types.h | 60 ++++++++++++++++ 8 files changed, 197 insertions(+), 71 deletions(-) diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c index df7d09409efe..8ca333f21292 100644 --- a/drivers/virt/vboxguest/vboxguest_core.c +++ b/drivers/virt/vboxguest/vboxguest_core.c @@ -27,6 +27,10 @@ #define GUEST_MAPPINGS_TRIES 5 +#define VBG_KERNEL_REQUEST \ + (VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV | \ + VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN) + /** * Reserves memory in which the VMM can relocate any guest mappings * that are floating around. @@ -48,7 +52,8 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev) int i, rc; /* Query the required space. */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO, + VBG_KERNEL_REQUEST); if (!req) return; @@ -135,7 +140,8 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev) * Tell the host that we're going to free the memory we reserved for * it, the free it up. (Leak the memory if anything goes wrong here.) */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO, + VBG_KERNEL_REQUEST); if (!req) return; @@ -172,8 +178,10 @@ static int vbg_report_guest_info(struct vbg_dev *gdev) struct vmmdev_guest_info2 *req2 = NULL; int rc, ret = -ENOMEM; - req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO); - req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2); + req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO, + VBG_KERNEL_REQUEST); + req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2, + VBG_KERNEL_REQUEST); if (!req1 || !req2) goto out_free; @@ -187,8 +195,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev) req2->additions_minor = VBG_VERSION_MINOR; req2->additions_build = VBG_VERSION_BUILD; req2->additions_revision = VBG_SVN_REV; - /* (no features defined yet) */ - req2->additions_features = 0; + req2->additions_features = + VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO; strlcpy(req2->name, VBG_VERSION_STRING, sizeof(req2->name)); @@ -230,7 +238,8 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active) struct vmmdev_guest_status *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -423,7 +432,8 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled) struct vmmdev_heartbeat *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -457,7 +467,8 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev) gdev->guest_heartbeat_req = vbg_req_alloc( sizeof(*gdev->guest_heartbeat_req), - VMMDEVREQ_GUEST_HEARTBEAT); + VMMDEVREQ_GUEST_HEARTBEAT, + VBG_KERNEL_REQUEST); if (!gdev->guest_heartbeat_req) return -ENOMEM; @@ -528,7 +539,8 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev, struct vmmdev_mask *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -567,8 +579,14 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev, u32 changed, previous; int rc, ret = 0; - /* Allocate a request buffer before taking the spinlock */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK); + /* + * Allocate a request buffer before taking the spinlock, when + * the session is being terminated the requestor is the kernel, + * as we're cleaning up. + */ + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK, + session_termination ? VBG_KERNEL_REQUEST : + session->requestor); if (!req) { if (!session_termination) return -ENOMEM; @@ -627,7 +645,8 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev) struct vmmdev_mask *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -662,8 +681,14 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev, u32 changed, previous; int rc, ret = 0; - /* Allocate a request buffer before taking the spinlock */ - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES); + /* + * Allocate a request buffer before taking the spinlock, when + * the session is being terminated the requestor is the kernel, + * as we're cleaning up. + */ + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES, + session_termination ? VBG_KERNEL_REQUEST : + session->requestor); if (!req) { if (!session_termination) return -ENOMEM; @@ -722,7 +747,8 @@ static int vbg_query_host_version(struct vbg_dev *gdev) struct vmmdev_host_version *req; int rc, ret; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; @@ -783,19 +809,24 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events) gdev->mem_balloon.get_req = vbg_req_alloc(sizeof(*gdev->mem_balloon.get_req), - VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ); + VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ, + VBG_KERNEL_REQUEST); gdev->mem_balloon.change_req = vbg_req_alloc(sizeof(*gdev->mem_balloon.change_req), - VMMDEVREQ_CHANGE_MEMBALLOON); + VMMDEVREQ_CHANGE_MEMBALLOON, + VBG_KERNEL_REQUEST); gdev->cancel_req = vbg_req_alloc(sizeof(*(gdev->cancel_req)), - VMMDEVREQ_HGCM_CANCEL2); + VMMDEVREQ_HGCM_CANCEL2, + VBG_KERNEL_REQUEST); gdev->ack_events_req = vbg_req_alloc(sizeof(*gdev->ack_events_req), - VMMDEVREQ_ACKNOWLEDGE_EVENTS); + VMMDEVREQ_ACKNOWLEDGE_EVENTS, + VBG_KERNEL_REQUEST); gdev->mouse_status_req = vbg_req_alloc(sizeof(*gdev->mouse_status_req), - VMMDEVREQ_GET_MOUSE_STATUS); + VMMDEVREQ_GET_MOUSE_STATUS, + VBG_KERNEL_REQUEST); if (!gdev->mem_balloon.get_req || !gdev->mem_balloon.change_req || !gdev->cancel_req || !gdev->ack_events_req || @@ -892,9 +923,9 @@ void vbg_core_exit(struct vbg_dev *gdev) * vboxguest_linux.c calls this when userspace opens the char-device. * Return: A pointer to the new session or an ERR_PTR on error. * @gdev: The Guest extension device. - * @user: Set if this is a session for the vboxuser device. + * @requestor: VMMDEV_REQUESTOR_* flags */ -struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user) +struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor) { struct vbg_session *session; @@ -903,7 +934,7 @@ struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user) return ERR_PTR(-ENOMEM); session->gdev = gdev; - session->user_session = user; + session->requestor = requestor; return session; } @@ -924,7 +955,9 @@ void vbg_core_close_session(struct vbg_session *session) if (!session->hgcm_client_ids[i]) continue; - vbg_hgcm_disconnect(gdev, session->hgcm_client_ids[i], &rc); + /* requestor is kernel here, as we're cleaning up. */ + vbg_hgcm_disconnect(gdev, VBG_KERNEL_REQUEST, + session->hgcm_client_ids[i], &rc); } kfree(session); @@ -1152,7 +1185,8 @@ static int vbg_req_allowed(struct vbg_dev *gdev, struct vbg_session *session, return -EPERM; } - if (trusted_apps_only && session->user_session) { + if (trusted_apps_only && + (session->requestor & VMMDEV_REQUESTOR_USER_DEVICE)) { vbg_err("Denying userspace vmm call type %#08x through vboxuser device node\n", req->request_type); return -EPERM; @@ -1209,8 +1243,8 @@ static int vbg_ioctl_hgcm_connect(struct vbg_dev *gdev, if (i >= ARRAY_SIZE(session->hgcm_client_ids)) return -EMFILE; - ret = vbg_hgcm_connect(gdev, &conn->u.in.loc, &client_id, - &conn->hdr.rc); + ret = vbg_hgcm_connect(gdev, session->requestor, &conn->u.in.loc, + &client_id, &conn->hdr.rc); mutex_lock(&gdev->session_mutex); if (ret == 0 && conn->hdr.rc >= 0) { @@ -1251,7 +1285,8 @@ static int vbg_ioctl_hgcm_disconnect(struct vbg_dev *gdev, if (i >= ARRAY_SIZE(session->hgcm_client_ids)) return -EINVAL; - ret = vbg_hgcm_disconnect(gdev, client_id, &disconn->hdr.rc); + ret = vbg_hgcm_disconnect(gdev, session->requestor, client_id, + &disconn->hdr.rc); mutex_lock(&gdev->session_mutex); if (ret == 0 && disconn->hdr.rc >= 0) @@ -1313,12 +1348,12 @@ static int vbg_ioctl_hgcm_call(struct vbg_dev *gdev, } if (IS_ENABLED(CONFIG_COMPAT) && f32bit) - ret = vbg_hgcm_call32(gdev, client_id, + ret = vbg_hgcm_call32(gdev, session->requestor, client_id, call->function, call->timeout_ms, VBG_IOCTL_HGCM_CALL_PARMS32(call), call->parm_count, &call->hdr.rc); else - ret = vbg_hgcm_call(gdev, client_id, + ret = vbg_hgcm_call(gdev, session->requestor, client_id, call->function, call->timeout_ms, VBG_IOCTL_HGCM_CALL_PARMS(call), call->parm_count, &call->hdr.rc); @@ -1408,6 +1443,7 @@ static int vbg_ioctl_check_balloon(struct vbg_dev *gdev, } static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev, + struct vbg_session *session, struct vbg_ioctl_write_coredump *dump) { struct vmmdev_write_core_dump *req; @@ -1415,7 +1451,8 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev, if (vbg_ioctl_chk(&dump->hdr, sizeof(dump->u.in), 0)) return -EINVAL; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP, + session->requestor); if (!req) return -ENOMEM; @@ -1476,7 +1513,7 @@ int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data) case VBG_IOCTL_CHECK_BALLOON: return vbg_ioctl_check_balloon(gdev, data); case VBG_IOCTL_WRITE_CORE_DUMP: - return vbg_ioctl_write_core_dump(gdev, data); + return vbg_ioctl_write_core_dump(gdev, session, data); } /* Variable sized requests. */ @@ -1508,7 +1545,8 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features) struct vmmdev_mouse_status *req; int rc; - req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS); + req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS, + VBG_KERNEL_REQUEST); if (!req) return -ENOMEM; diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h index 7ad9ec45bfa9..4188c12b839f 100644 --- a/drivers/virt/vboxguest/vboxguest_core.h +++ b/drivers/virt/vboxguest/vboxguest_core.h @@ -154,15 +154,15 @@ struct vbg_session { * host. Protected by vbg_gdev.session_mutex. */ u32 guest_caps; - /** Does this session belong to a root process or a user one? */ - bool user_session; + /** VMMDEV_REQUESTOR_* flags */ + u32 requestor; /** Set on CANCEL_ALL_WAITEVENTS, protected by vbg_devevent_spinlock. */ bool cancel_waiters; }; int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events); void vbg_core_exit(struct vbg_dev *gdev); -struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user); +struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor); void vbg_core_close_session(struct vbg_session *session); int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data); int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features); @@ -172,12 +172,13 @@ irqreturn_t vbg_core_isr(int irq, void *dev_id); void vbg_linux_mouse_event(struct vbg_dev *gdev); /* Private (non exported) functions form vboxguest_utils.c */ -void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type); +void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type, + u32 requestor); void vbg_req_free(void *req, size_t len); int vbg_req_perform(struct vbg_dev *gdev, void *req); int vbg_hgcm_call32( - struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, - struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, - int *vbox_status); + struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function, + u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32, + u32 parm_count, int *vbox_status); #endif diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c index 6e2a9619192d..6e8c0f1c1056 100644 --- a/drivers/virt/vboxguest/vboxguest_linux.c +++ b/drivers/virt/vboxguest/vboxguest_linux.c @@ -5,6 +5,7 @@ * Copyright (C) 2006-2016 Oracle Corporation */ +#include #include #include #include @@ -28,6 +29,23 @@ static DEFINE_MUTEX(vbg_gdev_mutex); /** Global vbg_gdev pointer used by vbg_get/put_gdev. */ static struct vbg_dev *vbg_gdev; +static u32 vbg_misc_device_requestor(struct inode *inode) +{ + u32 requestor = VMMDEV_REQUESTOR_USERMODE | + VMMDEV_REQUESTOR_CON_DONT_KNOW | + VMMDEV_REQUESTOR_TRUST_NOT_GIVEN; + + if (from_kuid(current_user_ns(), current->cred->uid) == 0) + requestor |= VMMDEV_REQUESTOR_USR_ROOT; + else + requestor |= VMMDEV_REQUESTOR_USR_USER; + + if (in_egroup_p(inode->i_gid)) + requestor |= VMMDEV_REQUESTOR_GRP_VBOX; + + return requestor; +} + static int vbg_misc_device_open(struct inode *inode, struct file *filp) { struct vbg_session *session; @@ -36,7 +54,7 @@ static int vbg_misc_device_open(struct inode *inode, struct file *filp) /* misc_open sets filp->private_data to our misc device */ gdev = container_of(filp->private_data, struct vbg_dev, misc_device); - session = vbg_core_open_session(gdev, false); + session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode)); if (IS_ERR(session)) return PTR_ERR(session); @@ -53,7 +71,8 @@ static int vbg_misc_device_user_open(struct inode *inode, struct file *filp) gdev = container_of(filp->private_data, struct vbg_dev, misc_device_user); - session = vbg_core_open_session(gdev, false); + session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode) | + VMMDEV_REQUESTOR_USER_DEVICE); if (IS_ERR(session)) return PTR_ERR(session); @@ -115,7 +134,8 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req, req == VBG_IOCTL_VMMDEV_REQUEST_BIG; if (is_vmmdev_req) - buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT); + buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT, + session->requestor); else buf = kmalloc(size, GFP_KERNEL); if (!buf) diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c index bf4474214b4d..75fd140b02ff 100644 --- a/drivers/virt/vboxguest/vboxguest_utils.c +++ b/drivers/virt/vboxguest/vboxguest_utils.c @@ -62,7 +62,8 @@ VBG_LOG(vbg_err, pr_err); VBG_LOG(vbg_debug, pr_debug); #endif -void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) +void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type, + u32 requestor) { struct vmmdev_request_header *req; int order = get_order(PAGE_ALIGN(len)); @@ -78,7 +79,7 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type) req->request_type = req_type; req->rc = VERR_GENERAL_FAILURE; req->reserved1 = 0; - req->reserved2 = 0; + req->requestor = requestor; return req; } @@ -119,7 +120,7 @@ static bool hgcm_req_done(struct vbg_dev *gdev, return done; } -int vbg_hgcm_connect(struct vbg_dev *gdev, +int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor, struct vmmdev_hgcm_service_location *loc, u32 *client_id, int *vbox_status) { @@ -127,7 +128,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev, int rc; hgcm_connect = vbg_req_alloc(sizeof(*hgcm_connect), - VMMDEVREQ_HGCM_CONNECT); + VMMDEVREQ_HGCM_CONNECT, requestor); if (!hgcm_connect) return -ENOMEM; @@ -153,13 +154,15 @@ int vbg_hgcm_connect(struct vbg_dev *gdev, } EXPORT_SYMBOL(vbg_hgcm_connect); -int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status) +int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor, + u32 client_id, int *vbox_status) { struct vmmdev_hgcm_disconnect *hgcm_disconnect = NULL; int rc; hgcm_disconnect = vbg_req_alloc(sizeof(*hgcm_disconnect), - VMMDEVREQ_HGCM_DISCONNECT); + VMMDEVREQ_HGCM_DISCONNECT, + requestor); if (!hgcm_disconnect) return -ENOMEM; @@ -593,9 +596,10 @@ static int hgcm_call_copy_back_result( return 0; } -int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, - u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms, - u32 parm_count, int *vbox_status) +int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id, + u32 function, u32 timeout_ms, + struct vmmdev_hgcm_function_parameter *parms, u32 parm_count, + int *vbox_status) { struct vmmdev_hgcm_call *call; void **bounce_bufs = NULL; @@ -615,7 +619,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, goto free_bounce_bufs; } - call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL); + call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL, requestor); if (!call) { ret = -ENOMEM; goto free_bounce_bufs; @@ -647,9 +651,9 @@ EXPORT_SYMBOL(vbg_hgcm_call); #ifdef CONFIG_COMPAT int vbg_hgcm_call32( - struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms, - struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count, - int *vbox_status) + struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function, + u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32, + u32 parm_count, int *vbox_status) { struct vmmdev_hgcm_function_parameter *parm64 = NULL; u32 i, size; @@ -689,7 +693,7 @@ int vbg_hgcm_call32( goto out_free; } - ret = vbg_hgcm_call(gdev, client_id, function, timeout_ms, + ret = vbg_hgcm_call(gdev, requestor, client_id, function, timeout_ms, parm64, parm_count, vbox_status); if (ret < 0) goto out_free; diff --git a/drivers/virt/vboxguest/vboxguest_version.h b/drivers/virt/vboxguest/vboxguest_version.h index 77f0c8f8a231..84834dad38d5 100644 --- a/drivers/virt/vboxguest/vboxguest_version.h +++ b/drivers/virt/vboxguest/vboxguest_version.h @@ -9,11 +9,10 @@ #ifndef __VBOX_VERSION_H__ #define __VBOX_VERSION_H__ -/* Last synced October 4th 2017 */ -#define VBG_VERSION_MAJOR 5 -#define VBG_VERSION_MINOR 2 +#define VBG_VERSION_MAJOR 6 +#define VBG_VERSION_MINOR 0 #define VBG_VERSION_BUILD 0 -#define VBG_SVN_REV 68940 -#define VBG_VERSION_STRING "5.2.0" +#define VBG_SVN_REV 127566 +#define VBG_VERSION_STRING "6.0.0" #endif diff --git a/drivers/virt/vboxguest/vmmdev.h b/drivers/virt/vboxguest/vmmdev.h index 5e2ae978935d..6337b8d75d96 100644 --- a/drivers/virt/vboxguest/vmmdev.h +++ b/drivers/virt/vboxguest/vmmdev.h @@ -98,8 +98,8 @@ struct vmmdev_request_header { s32 rc; /** Reserved field no.1. MBZ. */ u32 reserved1; - /** Reserved field no.2. MBZ. */ - u32 reserved2; + /** IN: Requestor information (VMMDEV_REQUESTOR_*) */ + u32 requestor; }; VMMDEV_ASSERT_SIZE(vmmdev_request_header, 24); @@ -247,6 +247,8 @@ struct vmmdev_guest_info { }; VMMDEV_ASSERT_SIZE(vmmdev_guest_info, 24 + 8); +#define VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO BIT(0) + /** struct vmmdev_guestinfo2 - Guest information report, version 2. */ struct vmmdev_guest_info2 { /** Header. */ @@ -259,7 +261,7 @@ struct vmmdev_guest_info2 { u32 additions_build; /** SVN revision. */ u32 additions_revision; - /** Feature mask, currently unused. */ + /** Feature mask. */ u32 additions_features; /** * The intentional meaning of this field was: diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h index a240ed2a0372..ff56c443180c 100644 --- a/include/linux/vbox_utils.h +++ b/include/linux/vbox_utils.h @@ -24,15 +24,17 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...); #define vbg_debug pr_debug #endif -int vbg_hgcm_connect(struct vbg_dev *gdev, +int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor, struct vmmdev_hgcm_service_location *loc, u32 *client_id, int *vbox_status); -int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status); +int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor, + u32 client_id, int *vbox_status); -int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function, - u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms, - u32 parm_count, int *vbox_status); +int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id, + u32 function, u32 timeout_ms, + struct vmmdev_hgcm_function_parameter *parms, u32 parm_count, + int *vbox_status); /** * Convert a VirtualBox status code to a standard Linux kernel return value. diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h index 0e68024f36c7..26f39816af14 100644 --- a/include/uapi/linux/vbox_vmmdev_types.h +++ b/include/uapi/linux/vbox_vmmdev_types.h @@ -102,6 +102,66 @@ enum vmmdev_request_type { #define VMMDEVREQ_HGCM_CALL VMMDEVREQ_HGCM_CALL32 #endif +/* vmmdev_request_header.requestor defines */ + +/* Requestor user not given. */ +#define VMMDEV_REQUESTOR_USR_NOT_GIVEN 0x00000000 +/* The kernel driver (vboxguest) is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV 0x00000001 +/* Some other kernel driver is the requestor. */ +#define VMMDEV_REQUESTOR_USR_DRV_OTHER 0x00000002 +/* The root or a admin user is the requestor. */ +#define VMMDEV_REQUESTOR_USR_ROOT 0x00000003 +/* Regular joe user is making the request. */ +#define VMMDEV_REQUESTOR_USR_USER 0x00000006 +/* User classification mask. */ +#define VMMDEV_REQUESTOR_USR_MASK 0x00000007 + +/* Kernel mode request. Note this is 0, check for !USERMODE instead. */ +#define VMMDEV_REQUESTOR_KERNEL 0x00000000 +/* User mode request. */ +#define VMMDEV_REQUESTOR_USERMODE 0x00000008 +/* User or kernel mode classification mask. */ +#define VMMDEV_REQUESTOR_MODE_MASK 0x00000008 + +/* Don't know the physical console association of the requestor. */ +#define VMMDEV_REQUESTOR_CON_DONT_KNOW 0x00000000 +/* + * The request originates with a process that is NOT associated with the + * physical console. + */ +#define VMMDEV_REQUESTOR_CON_NO 0x00000010 +/* Requestor process is associated with the physical console. */ +#define VMMDEV_REQUESTOR_CON_YES 0x00000020 +/* Console classification mask. */ +#define VMMDEV_REQUESTOR_CON_MASK 0x00000030 + +/* Requestor is member of special VirtualBox user group. */ +#define VMMDEV_REQUESTOR_GRP_VBOX 0x00000080 + +/* Note: trust level is for windows guests only, linux always uses not-given */ +/* Requestor trust level: Unspecified */ +#define VMMDEV_REQUESTOR_TRUST_NOT_GIVEN 0x00000000 +/* Requestor trust level: Untrusted (SID S-1-16-0) */ +#define VMMDEV_REQUESTOR_TRUST_UNTRUSTED 0x00001000 +/* Requestor trust level: Untrusted (SID S-1-16-4096) */ +#define VMMDEV_REQUESTOR_TRUST_LOW 0x00002000 +/* Requestor trust level: Medium (SID S-1-16-8192) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM 0x00003000 +/* Requestor trust level: Medium plus (SID S-1-16-8448) */ +#define VMMDEV_REQUESTOR_TRUST_MEDIUM_PLUS 0x00004000 +/* Requestor trust level: High (SID S-1-16-12288) */ +#define VMMDEV_REQUESTOR_TRUST_HIGH 0x00005000 +/* Requestor trust level: System (SID S-1-16-16384) */ +#define VMMDEV_REQUESTOR_TRUST_SYSTEM 0x00006000 +/* Requestor trust level >= Protected (SID S-1-16-20480, S-1-16-28672) */ +#define VMMDEV_REQUESTOR_TRUST_PROTECTED 0x00007000 +/* Requestor trust level mask */ +#define VMMDEV_REQUESTOR_TRUST_MASK 0x00007000 + +/* Requestor is using the less trusted user device node (/dev/vboxuser) */ +#define VMMDEV_REQUESTOR_USER_DEVICE 0x00008000 + /** HGCM service location types. */ enum vmmdev_hgcm_service_location_type { VMMDEV_HGCM_LOC_INVALID = 0, -- cgit From daf5cc27eed99afdea8d96e71b89ba41f5406ef6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Mar 2019 01:38:58 +0000 Subject: ceph: fix use-after-free on symlink traversal free the symlink body after the same RCU delay we have for freeing the struct inode itself, so that traversal during RCU pathwalk wouldn't step into freed memory. Signed-off-by: Al Viro Reviewed-by: Jeff Layton Signed-off-by: Ilya Dryomov --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e3346628efe2..2d61ddda9bf5 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -524,6 +524,7 @@ static void ceph_i_callback(struct rcu_head *head) struct inode *inode = container_of(head, struct inode, i_rcu); struct ceph_inode_info *ci = ceph_inode(inode); + kfree(ci->i_symlink); kmem_cache_free(ceph_inode_cachep, ci); } @@ -566,7 +567,6 @@ void ceph_destroy_inode(struct inode *inode) } } - kfree(ci->i_symlink); while ((n = rb_first(&ci->i_fragtree)) != NULL) { frag = rb_entry(n, struct ceph_inode_frag, node); rb_erase(n, &ci->i_fragtree); -- cgit From 9e0a17db517d83d568bb7fa983b54759d4e34f1f Mon Sep 17 00:00:00 2001 From: Chen Zhou Date: Wed, 27 Mar 2019 21:51:16 +0800 Subject: arm64: replace memblock_alloc_low with memblock_alloc If we use "crashkernel=Y[@X]" and the start address is above 4G, the arm64 kdump capture kernel may call memblock_alloc_low() failure in request_standard_resources(). Replacing memblock_alloc_low() with memblock_alloc(). [ 0.000000] MEMBLOCK configuration: [ 0.000000] memory size = 0x0000000040650000 reserved size = 0x0000000004db7f39 [ 0.000000] memory.cnt = 0x6 [ 0.000000] memory[0x0] [0x00000000395f0000-0x000000003968ffff], 0x00000000000a0000 bytes on node 0 flags: 0x4 [ 0.000000] memory[0x1] [0x0000000039730000-0x000000003973ffff], 0x0000000000010000 bytes on node 0 flags: 0x4 [ 0.000000] memory[0x2] [0x0000000039780000-0x000000003986ffff], 0x00000000000f0000 bytes on node 0 flags: 0x4 [ 0.000000] memory[0x3] [0x0000000039890000-0x0000000039d0ffff], 0x0000000000480000 bytes on node 0 flags: 0x4 [ 0.000000] memory[0x4] [0x000000003ed00000-0x000000003ed2ffff], 0x0000000000030000 bytes on node 0 flags: 0x4 [ 0.000000] memory[0x5] [0x0000002040000000-0x000000207fffffff], 0x0000000040000000 bytes on node 0 flags: 0x0 [ 0.000000] reserved.cnt = 0x7 [ 0.000000] reserved[0x0] [0x0000002040080000-0x0000002041c4dfff], 0x0000000001bce000 bytes flags: 0x0 [ 0.000000] reserved[0x1] [0x0000002041c53000-0x0000002042c203f8], 0x0000000000fcd3f9 bytes flags: 0x0 [ 0.000000] reserved[0x2] [0x000000207da00000-0x000000207dbfffff], 0x0000000000200000 bytes flags: 0x0 [ 0.000000] reserved[0x3] [0x000000207ddef000-0x000000207fbfffff], 0x0000000001e11000 bytes flags: 0x0 [ 0.000000] reserved[0x4] [0x000000207fdf2b00-0x000000207fdfc03f], 0x0000000000009540 bytes flags: 0x0 [ 0.000000] reserved[0x5] [0x000000207fdfd000-0x000000207ffff3ff], 0x0000000000202400 bytes flags: 0x0 [ 0.000000] reserved[0x6] [0x000000207ffffe00-0x000000207fffffff], 0x0000000000000200 bytes flags: 0x0 [ 0.000000] Kernel panic - not syncing: request_standard_resources: Failed to allocate 384 bytes [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 5.1.0-next-20190321+ #4 [ 0.000000] Call trace: [ 0.000000] dump_backtrace+0x0/0x188 [ 0.000000] show_stack+0x24/0x30 [ 0.000000] dump_stack+0xa8/0xcc [ 0.000000] panic+0x14c/0x31c [ 0.000000] setup_arch+0x2b0/0x5e0 [ 0.000000] start_kernel+0x90/0x52c [ 0.000000] ---[ end Kernel panic - not syncing: request_standard_resources: Failed to allocate 384 bytes ]--- Link: https://www.spinics.net/lists/arm-kernel/msg715293.html Signed-off-by: Chen Zhou Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f8482fe5a190..413d566405d1 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -217,7 +217,7 @@ static void __init request_standard_resources(void) num_standard_resources = memblock.memory.cnt; res_size = num_standard_resources * sizeof(*standard_resources); - standard_resources = memblock_alloc_low(res_size, SMP_CACHE_BYTES); + standard_resources = memblock_alloc(res_size, SMP_CACHE_BYTES); if (!standard_resources) panic("%s: Failed to allocate %zu bytes\n", __func__, res_size); -- cgit From 70fc085c5015c54a7b8742a45fc9ab05d6da90da Mon Sep 17 00:00:00 2001 From: zhengbin Date: Fri, 22 Mar 2019 10:56:46 +0800 Subject: scsi: core: Run queue when state is set to running after being blocked Use dd to test a SCSI device: 1. echo "blocked" >/sys/block/sda/device/state 2. dd if=/dev/sda of=/mnt/t.log bs=1M count=10 3. echo "running" >/sys/block/sda/device/state dd should finish this work after step 3, but it hangs. After step2, the call chain is this: blk_mq_dispatch_rq_list-->scsi_queue_rq-->prep_to_mq prep_to_mq will return BLK_STS_RESOURCE, and scsi_queue_rq will transition it to BLK_STS_DEV_RESOURCE which means that driver can guarantee that IO dispatch will be triggered in future when the resource is available. Need to follow the rule if we set the device state to running. [mkp: tweaked commit description and code comment as suggested by Bart] Signed-off-by: zhengbin Reviewed-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/scsi_sysfs.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 6a9040faed00..3b119ca0cc0c 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -771,6 +771,12 @@ store_state_field(struct device *dev, struct device_attribute *attr, mutex_lock(&sdev->state_mutex); ret = scsi_device_set_state(sdev, state); + /* + * If the device state changes to SDEV_RUNNING, we need to run + * the queue to avoid I/O hang. + */ + if (ret == 0 && state == SDEV_RUNNING) + blk_mq_run_hw_queues(sdev->request_queue, true); mutex_unlock(&sdev->state_mutex); return ret == 0 ? count : -EINVAL; -- cgit From c14a57264399efd39514a2329c591a4b954246d8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 25 Mar 2019 10:01:46 -0700 Subject: scsi: sd: Fix a race between closing an sd device and sd I/O The scsi_end_request() function calls scsi_cmd_to_driver() indirectly and hence needs the disk->private_data pointer. Avoid that that pointer is cleared before all affected I/O requests have finished. This patch avoids that the following crash occurs: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000 Call trace: scsi_mq_uninit_cmd+0x1c/0x30 scsi_end_request+0x7c/0x1b8 scsi_io_completion+0x464/0x668 scsi_finish_command+0xbc/0x160 scsi_eh_flush_done_q+0x10c/0x170 sas_scsi_recover_host+0x84c/0xa98 [libsas] scsi_error_handler+0x140/0x5b0 kthread+0x100/0x12c ret_from_fork+0x10/0x18 Cc: Christoph Hellwig Cc: Ming Lei Cc: Hannes Reinecke Cc: Johannes Thumshirn Cc: Jason Yan Cc: Signed-off-by: Bart Van Assche Reported-by: Jason Yan Reviewed-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 251db30d0882..3ffa34dc2fd5 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1415,11 +1415,6 @@ static void sd_release(struct gendisk *disk, fmode_t mode) scsi_set_medium_removal(sdev, SCSI_REMOVAL_ALLOW); } - /* - * XXX and what if there are packets in flight and this close() - * XXX is followed by a "rmmod sd_mod"? - */ - scsi_disk_put(sdkp); } @@ -3505,9 +3500,21 @@ static void scsi_disk_release(struct device *dev) { struct scsi_disk *sdkp = to_scsi_disk(dev); struct gendisk *disk = sdkp->disk; - + struct request_queue *q = disk->queue; + ida_free(&sd_index_ida, sdkp->index); + /* + * Wait until all requests that are in progress have completed. + * This is necessary to avoid that e.g. scsi_end_request() crashes + * due to clearing the disk->private_data pointer. Wait from inside + * scsi_disk_release() instead of from sd_release() to avoid that + * freezing and unfreezing the request queue affects user space I/O + * in case multiple processes open a /dev/sd... node concurrently. + */ + blk_mq_freeze_queue(q); + blk_mq_unfreeze_queue(q); + disk->private_data = NULL; put_disk(disk); put_device(&sdkp->device->sdev_gendev); -- cgit From 1d5de5bd311be7cd54f02f7cd164f0349a75c876 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 27 Mar 2019 12:11:52 -0400 Subject: scsi: sd: Quiesce warning if device does not report optimal I/O size Commit a83da8a4509d ("scsi: sd: Optimal I/O size should be a multiple of physical block size") split one conditional into several separate statements in an effort to provide more accurate warning messages when a device reports a nonsensical value. However, this reorganization accidentally dropped the precondition of the reported value being larger than zero. This lead to a warning getting emitted on devices that do not report an optimal I/O size at all. Remain silent if a device does not report an optimal I/O size. Fixes: a83da8a4509d ("scsi: sd: Optimal I/O size should be a multiple of physical block size") Cc: Randy Dunlap Cc: Reported-by: Hussam Al-Tayeb Tested-by: Hussam Al-Tayeb Reviewed-by: Bart Van Assche Signed-off-by: Martin K. Petersen --- drivers/scsi/sd.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 3ffa34dc2fd5..2b2bc4b49d78 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -3071,6 +3071,9 @@ static bool sd_validate_opt_xfer_size(struct scsi_disk *sdkp, unsigned int opt_xfer_bytes = logical_to_bytes(sdp, sdkp->opt_xfer_blocks); + if (sdkp->opt_xfer_blocks == 0) + return false; + if (sdkp->opt_xfer_blocks > dev_max) { sd_first_printk(KERN_WARNING, sdkp, "Optimal transfer size %u logical blocks " \ -- cgit From fe67888fc007a76b81e37da23ce5bd8fb95890b0 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:36:58 +0100 Subject: scsi: zfcp: fix rport unblock if deleted SCSI devices on Scsi_Host An already deleted SCSI device can exist on the Scsi_Host and remain there because something still holds a reference. A new SCSI device with the same H:C:T:L and FCP device, target port WWPN, and FCP LUN can be created. When we try to unblock an rport, we still find the deleted SCSI device and return early because the zfcp_scsi_dev of that SCSI device is not ZFCP_STATUS_COMMON_UNBLOCKED. Hence we miss to unblock the rport, even if the new proper SCSI device would be in good state. Therefore, skip deleted SCSI devices when iterating the sdevs of the shost. [cf. __scsi_device_lookup{_by_target}() or scsi_device_get()] The following abbreviated trace sequence can indicate such problem: Area : REC Tag : ersfs_3 LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x40000000 not ZFCP_STATUS_COMMON_UNBLOCKED Ready count : n not incremented yet Running count : 0x00000000 ERP want : 0x01 ERP need : 0xc1 ZFCP_ERP_ACTION_NONE Area : REC Tag : ersfs_3 LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x41000000 Ready count : n+1 Running count : 0x00000000 ERP want : 0x01 ERP need : 0x01 ... Area : REC Level : 4 only with increased trace level Tag : ertru_l LUN : 0x4045400300000000 WWPN : 0x50050763031bd327 LUN status : 0x40000000 Request ID : 0x0000000000000000 ERP status : 0x01800000 ERP step : 0x1000 ERP action : 0x01 ERP count : 0x00 NOT followed by a trace record with tag "scpaddy" for WWPN 0x50050763031bd327. Signed-off-by: Steffen Maier Fixes: 6f2ce1c6af37 ("scsi: zfcp: fix rport unblock race with LUN recovery") Cc: #2.6.32+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index 744a64680d5b..c0b2348d7ce6 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -1341,6 +1341,9 @@ static void zfcp_erp_try_rport_unblock(struct zfcp_port *port) struct zfcp_scsi_dev *zsdev = sdev_to_zfcp(sdev); int lun_status; + if (sdev->sdev_state == SDEV_DEL || + sdev->sdev_state == SDEV_CANCEL) + continue; if (zsdev->port != port) continue; /* LUN under port of interest */ -- cgit From 242ec1455151267fe35a0834aa9038e4c4670884 Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:36:59 +0100 Subject: scsi: zfcp: fix scsi_eh host reset with port_forced ERP for non-NPIV FCP devices Suppose more than one non-NPIV FCP device is active on the same channel. Send I/O to storage and have some of the pending I/O run into a SCSI command timeout, e.g. due to bit errors on the fibre. Now the error situation stops. However, we saw FCP requests continue to timeout in the channel. The abort will be successful, but the subsequent TUR fails. Scsi_eh starts. The LUN reset fails. The target reset fails. The host reset only did an FCP device recovery. However, for non-NPIV FCP devices, this does not close and reopen ports on the SAN-side if other non-NPIV FCP device(s) share the same open ports. In order to resolve the continuing FCP request timeouts, we need to explicitly close and reopen ports on the SAN-side. This was missing since the beginning of zfcp in v2.6.0 history commit ea127f975424 ("[PATCH] s390 (7/7): zfcp host adapter."). Note: The FSF requests for forced port reopen could run into FSF request timeouts due to other reasons. This would trigger an internal FCP device recovery. Pending forced port reopen recoveries would get dismissed. So some ports might not get fully reopened during this host reset handler. However, subsequent I/O would trigger the above described escalation and eventually all ports would be forced reopen to resolve any continuing FCP request timeouts due to earlier bit errors. Signed-off-by: Steffen Maier Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Cc: #3.0+ Reviewed-by: Jens Remus Reviewed-by: Benjamin Block Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_erp.c | 14 ++++++++++++++ drivers/s390/scsi/zfcp_ext.h | 2 ++ drivers/s390/scsi/zfcp_scsi.c | 4 ++++ 3 files changed, 20 insertions(+) diff --git a/drivers/s390/scsi/zfcp_erp.c b/drivers/s390/scsi/zfcp_erp.c index c0b2348d7ce6..e8fc28dba8df 100644 --- a/drivers/s390/scsi/zfcp_erp.c +++ b/drivers/s390/scsi/zfcp_erp.c @@ -624,6 +624,20 @@ static void zfcp_erp_strategy_memwait(struct zfcp_erp_action *erp_action) add_timer(&erp_action->timer); } +void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag) +{ + unsigned long flags; + struct zfcp_port *port; + + write_lock_irqsave(&adapter->erp_lock, flags); + read_lock(&adapter->port_list_lock); + list_for_each_entry(port, &adapter->port_list, list) + _zfcp_erp_port_forced_reopen(port, clear, dbftag); + read_unlock(&adapter->port_list_lock); + write_unlock_irqrestore(&adapter->erp_lock, flags); +} + static void _zfcp_erp_port_reopen_all(struct zfcp_adapter *adapter, int clear, char *dbftag) { diff --git a/drivers/s390/scsi/zfcp_ext.h b/drivers/s390/scsi/zfcp_ext.h index 3fce47b0b21b..c6acca521ffe 100644 --- a/drivers/s390/scsi/zfcp_ext.h +++ b/drivers/s390/scsi/zfcp_ext.h @@ -70,6 +70,8 @@ extern void zfcp_erp_port_reopen(struct zfcp_port *port, int clear, char *dbftag); extern void zfcp_erp_port_shutdown(struct zfcp_port *, int, char *); extern void zfcp_erp_port_forced_reopen(struct zfcp_port *, int, char *); +extern void zfcp_erp_port_forced_reopen_all(struct zfcp_adapter *adapter, + int clear, char *dbftag); extern void zfcp_erp_set_lun_status(struct scsi_device *, u32); extern void zfcp_erp_clear_lun_status(struct scsi_device *, u32); extern void zfcp_erp_lun_reopen(struct scsi_device *, int, char *); diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c index f4f6a07c5222..221d0dfb8493 100644 --- a/drivers/s390/scsi/zfcp_scsi.c +++ b/drivers/s390/scsi/zfcp_scsi.c @@ -368,6 +368,10 @@ static int zfcp_scsi_eh_host_reset_handler(struct scsi_cmnd *scpnt) struct zfcp_adapter *adapter = zfcp_sdev->port->adapter; int ret = SUCCESS, fc_ret; + if (!(adapter->connection_features & FSF_FEATURE_NPIV_MODE)) { + zfcp_erp_port_forced_reopen_all(adapter, 0, "schrh_p"); + zfcp_erp_wait(adapter); + } zfcp_erp_adapter_reopen(adapter, 0, "schrh_1"); zfcp_erp_wait(adapter); fc_ret = fc_block_scsi_eh(scpnt); -- cgit From c8206579175c34a2546de8a74262456278a7795a Mon Sep 17 00:00:00 2001 From: Steffen Maier Date: Tue, 26 Mar 2019 14:37:00 +0100 Subject: scsi: zfcp: reduce flood of fcrscn1 trace records on multi-element RSCN If an incoming ELS of type RSCN contains more than one element, zfcp suboptimally causes repeated erp trigger NOP trace records for each previously failed port. These could be ports that went away. It loops over each RSCN element, and for each of those in an inner loop over all zfcp_ports. The trigger to recover failed ports should be just the reception of some RSCN, no matter how many elements it has. So we can loop over failed ports separately, and only then loop over each RSCN element to handle the non-failed ports. The call chain was: zfcp_fc_incoming_rscn for (i = 1; i < no_entries; i++) _zfcp_fc_incoming_rscn list_for_each_entry(port, &adapter->port_list, list) if (masked port->d_id match) zfcp_fc_test_link if (!port->d_id) zfcp_erp_port_reopen "fcrscn1" <=== In order the reduce the "flooding" of the REC trace area in such cases, we factor out handling the failed ports to be outside of the entries loop: zfcp_fc_incoming_rscn if (no_entries > 1) <=== list_for_each_entry(port, &adapter->port_list, list) <=== if (!port->d_id) zfcp_erp_port_reopen "fcrscn1" <=== for (i = 1; i < no_entries; i++) _zfcp_fc_incoming_rscn list_for_each_entry(port, &adapter->port_list, list) if (masked port->d_id match) zfcp_fc_test_link Abbreviated example trace records before this code change: Tag : fcrscn1 WWPN : 0x500507630310d327 ERP want : 0x02 ERP need : 0x02 Tag : fcrscn1 WWPN : 0x500507630310d327 ERP want : 0x02 ERP need : 0x00 NOP => superfluous trace record The last trace entry repeats if there are more than 2 RSCN elements. Signed-off-by: Steffen Maier Reviewed-by: Benjamin Block Reviewed-by: Jens Remus Signed-off-by: Martin K. Petersen --- drivers/s390/scsi/zfcp_fc.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/s390/scsi/zfcp_fc.c b/drivers/s390/scsi/zfcp_fc.c index db00b5e3abbe..33eddb02ee30 100644 --- a/drivers/s390/scsi/zfcp_fc.c +++ b/drivers/s390/scsi/zfcp_fc.c @@ -239,10 +239,6 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range, list_for_each_entry(port, &adapter->port_list, list) { if ((port->d_id & range) == (ntoh24(page->rscn_fid) & range)) zfcp_fc_test_link(port); - if (!port->d_id) - zfcp_erp_port_reopen(port, - ZFCP_STATUS_COMMON_ERP_FAILED, - "fcrscn1"); } read_unlock_irqrestore(&adapter->port_list_lock, flags); } @@ -250,6 +246,7 @@ static void _zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req, u32 range, static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req) { struct fsf_status_read_buffer *status_buffer = (void *)fsf_req->data; + struct zfcp_adapter *adapter = fsf_req->adapter; struct fc_els_rscn *head; struct fc_els_rscn_page *page; u16 i; @@ -263,6 +260,22 @@ static void zfcp_fc_incoming_rscn(struct zfcp_fsf_req *fsf_req) no_entries = be16_to_cpu(head->rscn_plen) / sizeof(struct fc_els_rscn_page); + if (no_entries > 1) { + /* handle failed ports */ + unsigned long flags; + struct zfcp_port *port; + + read_lock_irqsave(&adapter->port_list_lock, flags); + list_for_each_entry(port, &adapter->port_list, list) { + if (port->d_id) + continue; + zfcp_erp_port_reopen(port, + ZFCP_STATUS_COMMON_ERP_FAILED, + "fcrscn1"); + } + read_unlock_irqrestore(&adapter->port_list_lock, flags); + } + for (i = 1; i < no_entries; i++) { /* skip head and start with 1st element */ page++; -- cgit From 6dc6a944d58aea3d9de3828318b0fffdb60a7097 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:51 -0500 Subject: scsi: ibmvfc: Remove "failed" from logged errors The text of messages logged with ibmvfc_log_error() always contain the term "failed". In the case of cancelled commands during EH they are reported back by the VIOS using error codes. This can be confusing to somebody looking at these log messages as to whether a command was successfully cancelled. The following real log message for example it is unclear if the transaction was actaully cancelled. <6>sd 0:0:1:1: Cancelling outstanding commands. <3>sd 0:0:1:1: [sde] Command (28) failed: transaction cancelled (2:6) flags: 0 fcp_rsp: 0, resid=0, scsi_status: 0 Remove prefixing of "failed" to all error logged messages. The ibmvfc_log_error() function translates the returned error/status codes to a human readable message already. Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index dbaa4f131433..c3ce27039552 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1494,7 +1494,7 @@ static void ibmvfc_log_error(struct ibmvfc_event *evt) if (rsp->flags & FCP_RSP_LEN_VALID) rsp_code = rsp->data.info.rsp_code; - scmd_printk(KERN_ERR, cmnd, "Command (%02X) failed: %s (%x:%x) " + scmd_printk(KERN_ERR, cmnd, "Command (%02X) : %s (%x:%x) " "flags: %x fcp_rsp: %x, resid=%d, scsi_status: %x\n", cmnd->cmnd[0], err, vfc_cmd->status, vfc_cmd->error, rsp->flags, rsp_code, scsi_get_resid(cmnd), rsp->scsi_status); -- cgit From 95237c25d8d08ebc451dd2d793f7e765f57b0c9f Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:52 -0500 Subject: scsi: ibmvfc: Add failed PRLI to cmd_status lookup array The VIOS uses the SCSI_ERROR class to report PRLI failures. These errors are indicated with the combination of a IBMVFC_FC_SCSI_ERROR return status and 0x8000 error code. Add these codes to cmd_status[] with appropriate human readable error message. Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index c3ce27039552..18ee2a8ec3d5 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -139,6 +139,7 @@ static const struct { { IBMVFC_FC_FAILURE, IBMVFC_VENDOR_SPECIFIC, DID_ERROR, 1, 1, "vendor specific" }, { IBMVFC_FC_SCSI_ERROR, 0, DID_OK, 1, 0, "SCSI error" }, + { IBMVFC_FC_SCSI_ERROR, IBMVFC_COMMAND_FAILED, DID_ERROR, 0, 1, "PRLI to device failed." }, }; static void ibmvfc_npiv_login(struct ibmvfc_host *); -- cgit From 3e6f7de43f4960fba8322b16531b0d6624a9322d Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:53 -0500 Subject: scsi: ibmvfc: Byte swap status and error codes when logging Status and error codes are returned in big endian from the VIOS. The values are translated into a human readable format when logged, but the values are also logged. This patch byte swaps those values so that they are consistent between BE and LE platforms. Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 18ee2a8ec3d5..33dda4d32f65 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -1497,7 +1497,7 @@ static void ibmvfc_log_error(struct ibmvfc_event *evt) scmd_printk(KERN_ERR, cmnd, "Command (%02X) : %s (%x:%x) " "flags: %x fcp_rsp: %x, resid=%d, scsi_status: %x\n", - cmnd->cmnd[0], err, vfc_cmd->status, vfc_cmd->error, + cmnd->cmnd[0], err, be16_to_cpu(vfc_cmd->status), be16_to_cpu(vfc_cmd->error), rsp->flags, rsp_code, scsi_get_resid(cmnd), rsp->scsi_status); } @@ -2023,7 +2023,7 @@ static int ibmvfc_reset_device(struct scsi_device *sdev, int type, char *desc) sdev_printk(KERN_ERR, sdev, "%s reset failed: %s (%x:%x) " "flags: %x fcp_rsp: %x, scsi_status: %x\n", desc, ibmvfc_get_cmd_error(be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error)), - rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code, + be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error), fc_rsp->flags, rsp_code, fc_rsp->scsi_status); rsp_rc = -EIO; } else @@ -2382,7 +2382,7 @@ static int ibmvfc_abort_task_set(struct scsi_device *sdev) sdev_printk(KERN_ERR, sdev, "Abort failed: %s (%x:%x) " "flags: %x fcp_rsp: %x, scsi_status: %x\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error)), - rsp_iu.cmd.status, rsp_iu.cmd.error, fc_rsp->flags, rsp_code, + be16_to_cpu(rsp_iu.cmd.status), be16_to_cpu(rsp_iu.cmd.error), fc_rsp->flags, rsp_code, fc_rsp->scsi_status); rsp_rc = -EIO; } else @@ -3349,7 +3349,7 @@ static void ibmvfc_tgt_prli_done(struct ibmvfc_event *evt) tgt_log(tgt, level, "Process Login failed: %s (%x:%x) rc=0x%02X\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error, status); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error), status); break; } @@ -3447,9 +3447,10 @@ static void ibmvfc_tgt_plogi_done(struct ibmvfc_event *evt) ibmvfc_set_tgt_action(tgt, IBMVFC_TGT_ACTION_DEL_RPORT); tgt_log(tgt, level, "Port Login failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n", - ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), rsp->status, rsp->error, - ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), rsp->fc_type, - ibmvfc_get_ls_explain(be16_to_cpu(rsp->fc_explain)), rsp->fc_explain, status); + ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error), + ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), be16_to_cpu(rsp->fc_type), + ibmvfc_get_ls_explain(be16_to_cpu(rsp->fc_explain)), be16_to_cpu(rsp->fc_explain), status); break; } @@ -3620,7 +3621,7 @@ static void ibmvfc_tgt_adisc_done(struct ibmvfc_event *evt) fc_explain = (be32_to_cpu(mad->fc_iu.response[1]) & 0x0000ff00) >> 8; tgt_info(tgt, "ADISC failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n", ibmvfc_get_cmd_error(be16_to_cpu(mad->iu.status), be16_to_cpu(mad->iu.error)), - mad->iu.status, mad->iu.error, + be16_to_cpu(mad->iu.status), be16_to_cpu(mad->iu.error), ibmvfc_get_fc_type(fc_reason), fc_reason, ibmvfc_get_ls_explain(fc_explain), fc_explain, status); break; @@ -3832,9 +3833,10 @@ static void ibmvfc_tgt_query_target_done(struct ibmvfc_event *evt) tgt_log(tgt, level, "Query Target failed: %s (%x:%x) %s (%x) %s (%x) rc=0x%02X\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error, ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), - rsp->fc_type, ibmvfc_get_gs_explain(be16_to_cpu(rsp->fc_explain)), - rsp->fc_explain, status); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error), + ibmvfc_get_fc_type(be16_to_cpu(rsp->fc_type)), be16_to_cpu(rsp->fc_type), + ibmvfc_get_gs_explain(be16_to_cpu(rsp->fc_explain)), be16_to_cpu(rsp->fc_explain), + status); break; } @@ -3960,7 +3962,7 @@ static void ibmvfc_discover_targets_done(struct ibmvfc_event *evt) level += ibmvfc_retry_host_init(vhost); ibmvfc_log(vhost, level, "Discover Targets failed: %s (%x:%x)\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)); break; case IBMVFC_MAD_DRIVER_FAILED: break; @@ -4025,7 +4027,7 @@ static void ibmvfc_npiv_login_done(struct ibmvfc_event *evt) ibmvfc_link_down(vhost, IBMVFC_LINK_DEAD); ibmvfc_log(vhost, level, "NPIV Login failed: %s (%x:%x)\n", ibmvfc_get_cmd_error(be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)), - rsp->status, rsp->error); + be16_to_cpu(rsp->status), be16_to_cpu(rsp->error)); ibmvfc_free_event(evt); return; case IBMVFC_MAD_CRQ_ERROR: -- cgit From d6e2635b9cf7982102750c5d9e4ba1474afa0981 Mon Sep 17 00:00:00 2001 From: Tyrel Datwyler Date: Wed, 20 Mar 2019 14:56:54 -0500 Subject: scsi: ibmvfc: Clean up transport events No change to functionality. Simply make transport event messages a little clearer, and rework CRQ format enums such that we have separate enums for INIT messages and XPORT events. [mkp: typo] Signed-off-by: Tyrel Datwyler Signed-off-by: Martin K. Petersen --- drivers/scsi/ibmvscsi/ibmvfc.c | 8 +++++--- drivers/scsi/ibmvscsi/ibmvfc.h | 7 ++++++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c index 33dda4d32f65..3ad997ac3510 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.c +++ b/drivers/scsi/ibmvscsi/ibmvfc.c @@ -2756,16 +2756,18 @@ static void ibmvfc_handle_crq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost) ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_NONE); if (crq->format == IBMVFC_PARTITION_MIGRATED) { /* We need to re-setup the interpartition connection */ - dev_info(vhost->dev, "Re-enabling adapter\n"); + dev_info(vhost->dev, "Partition migrated, Re-enabling adapter\n"); vhost->client_migrated = 1; ibmvfc_purge_requests(vhost, DID_REQUEUE); ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_REENABLE); - } else { - dev_err(vhost->dev, "Virtual adapter failed (rc=%d)\n", crq->format); + } else if (crq->format == IBMVFC_PARTNER_FAILED || crq->format == IBMVFC_PARTNER_DEREGISTER) { + dev_err(vhost->dev, "Host partner adapter deregistered or failed (rc=%d)\n", crq->format); ibmvfc_purge_requests(vhost, DID_ERROR); ibmvfc_link_down(vhost, IBMVFC_LINK_DOWN); ibmvfc_set_host_action(vhost, IBMVFC_HOST_ACTION_RESET); + } else { + dev_err(vhost->dev, "Received unknown transport event from partner (rc=%d)\n", crq->format); } return; case IBMVFC_CRQ_CMD_RSP: diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h index b81a53c4a9a8..459cc288ba1d 100644 --- a/drivers/scsi/ibmvscsi/ibmvfc.h +++ b/drivers/scsi/ibmvscsi/ibmvfc.h @@ -78,9 +78,14 @@ enum ibmvfc_crq_valid { IBMVFC_CRQ_XPORT_EVENT = 0xFF, }; -enum ibmvfc_crq_format { +enum ibmvfc_crq_init_msg { IBMVFC_CRQ_INIT = 0x01, IBMVFC_CRQ_INIT_COMPLETE = 0x02, +}; + +enum ibmvfc_crq_xport_evts { + IBMVFC_PARTNER_FAILED = 0x01, + IBMVFC_PARTNER_DEREGISTER = 0x02, IBMVFC_PARTITION_MIGRATED = 0x06, }; -- cgit From a595ecdd5f60b2d93863cebb07eec7f935839b54 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 27 Mar 2019 10:11:14 +0900 Subject: USB: serial: cp210x: add new device id Lorenz Messtechnik has a device that is controlled by the cp210x driver, so add the device id to the driver. The device id was provided by Silicon-Labs for the devices from this vendor. Reported-by: Uli Signed-off-by: Greg Kroah-Hartman Cc: stable Signed-off-by: Johan Hovold --- drivers/usb/serial/cp210x.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c index fffe23ab0189..979bef9bfb6b 100644 --- a/drivers/usb/serial/cp210x.c +++ b/drivers/usb/serial/cp210x.c @@ -80,6 +80,7 @@ static const struct usb_device_id id_table[] = { { USB_DEVICE(0x10C4, 0x804E) }, /* Software Bisque Paramount ME build-in converter */ { USB_DEVICE(0x10C4, 0x8053) }, /* Enfora EDG1228 */ { USB_DEVICE(0x10C4, 0x8054) }, /* Enfora GSM2228 */ + { USB_DEVICE(0x10C4, 0x8056) }, /* Lorenz Messtechnik devices */ { USB_DEVICE(0x10C4, 0x8066) }, /* Argussoft In-System Programmer */ { USB_DEVICE(0x10C4, 0x806F) }, /* IMS USB to RS422 Converter Cable */ { USB_DEVICE(0x10C4, 0x807A) }, /* Crumb128 board */ -- cgit From 84f3b43f7378b98b7e3096d5499de75183d4347c Mon Sep 17 00:00:00 2001 From: Bjørn Mork Date: Wed, 27 Mar 2019 15:25:32 +0100 Subject: USB: serial: option: add Olicard 600 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a Qualcomm based device with a QMI function on interface 4. It is mode switched from 2020:2030 using a standard eject message. T: Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 6 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=00 MxPS=64 #Cfgs= 1 P: Vendor=2020 ProdID=2031 Rev= 2.32 S: Manufacturer=Mobile Connect S: Product=Mobile Connect S: SerialNumber=0123456789ABCDEF C:* #Ifs= 6 Cfg#= 1 Atr=80 MxPwr=500mA I:* If#= 0 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 1 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=83(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=85(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=(none) E: Ad=87(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none) E: Ad=89(I) Atr=03(Int.) MxPS= 8 Ivl=32ms E: Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=08(stor.) Sub=06 Prot=50 Driver=(none) E: Ad=8a(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=06(O) Atr=02(Bulk) MxPS= 512 Ivl=125us Cc: stable@vger.kernel.org Signed-off-by: Bjørn Mork [ johan: use tabs to align comments in adjacent lines ] Signed-off-by: Johan Hovold --- drivers/usb/serial/option.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c index db5ece767d59..83869065b802 100644 --- a/drivers/usb/serial/option.c +++ b/drivers/usb/serial/option.c @@ -1945,10 +1945,12 @@ static const struct usb_device_id option_ids[] = { .driver_info = RSVD(4) }, { USB_DEVICE_INTERFACE_CLASS(0x2001, 0x7e35, 0xff), /* D-Link DWM-222 */ .driver_info = RSVD(4) }, - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ - { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ - { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e01, 0xff, 0xff, 0xff) }, /* D-Link DWM-152/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x3e02, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/C1 */ + { USB_DEVICE_AND_INTERFACE_INFO(0x07d1, 0x7e11, 0xff, 0xff, 0xff) }, /* D-Link DWM-156/A3 */ + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x2031, 0xff), /* Olicard 600 */ + .driver_info = RSVD(4) }, + { USB_DEVICE_INTERFACE_CLASS(0x2020, 0x4000, 0xff) }, /* OLICARD300 - MT6225 */ { USB_DEVICE(INOVIA_VENDOR_ID, INOVIA_SEW858) }, { USB_DEVICE(VIATELECOM_VENDOR_ID, VIATELECOM_PRODUCT_CDS7) }, { USB_DEVICE_AND_INTERFACE_INFO(WETELECOM_VENDOR_ID, WETELECOM_PRODUCT_WMD200, 0xff, 0xff, 0xff) }, -- cgit From b6ffdf27f3d4f1e9af56effe6f86989170d71e95 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 18 Mar 2019 15:50:27 +0100 Subject: s390/cpumf: Fix warning from check_processor_id Function __hw_perf_event_init() used a CPU variable without ensuring CPU preemption has been disabled. This caused the following warning in the kernel log: [ 7.277085] BUG: using smp_processor_id() in preemptible [00000000] code: cf-csdiag/1892 [ 7.277111] caller is cf_diag_event_init+0x13a/0x338 [ 7.277122] CPU: 10 PID: 1892 Comm: cf-csdiag Not tainted 5.0.0-20190318.rc0.git0.9e1a11e0f602.300.fc29.s390x+debug #1 [ 7.277131] Hardware name: IBM 2964 NC9 712 (LPAR) [ 7.277139] Call Trace: [ 7.277150] ([<000000000011385a>] show_stack+0x82/0xd0) [ 7.277161] [<0000000000b7a71a>] dump_stack+0x92/0xd0 [ 7.277174] [<00000000007b7e9c>] check_preemption_disabled+0xe4/0x100 [ 7.277183] [<00000000001228aa>] cf_diag_event_init+0x13a/0x338 [ 7.277195] [<00000000002cf3aa>] perf_try_init_event+0x72/0xf0 [ 7.277204] [<00000000002d0bba>] perf_event_alloc+0x6fa/0xce0 [ 7.277214] [<00000000002dc4a8>] __s390x_sys_perf_event_open+0x398/0xd50 [ 7.277224] [<0000000000b9e8f0>] system_call+0xdc/0x2d8 [ 7.277233] 2 locks held by cf-csdiag/1892: [ 7.277241] #0: 00000000976f5510 (&sig->cred_guard_mutex){+.+.}, at: __s390x_sys_perf_event_open+0xd2e/0xd50 [ 7.277257] #1: 00000000363b11bd (&pmus_srcu){....}, at: perf_event_alloc+0x52e/0xce0 The variable is now accessed in proper context. Use get_cpu_var()/put_cpu_var() pair to disable preemption during access. As the hardware authorization settings apply to all CPUs, it does not matter which CPU is used to check the authorization setting. Remove the event->count assignment. It is not needed as function perf_event_alloc() allocates memory for the event with kzalloc() and thus count is already set to zero. Fixes: fe5908bccc56 ("s390/cpum_cf_diag: Add support for s390 counter facility diagnostic trace") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf_diag.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index c6fad208c2fa..b6854812d2ed 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -196,23 +196,30 @@ static void cf_diag_perf_event_destroy(struct perf_event *event) */ static int __hw_perf_event_init(struct perf_event *event) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); struct perf_event_attr *attr = &event->attr; + struct cpu_cf_events *cpuhw; enum cpumf_ctr_set i; int err = 0; - debug_sprintf_event(cf_diag_dbg, 5, - "%s event %p cpu %d authorized %#x\n", __func__, - event, event->cpu, cpuhw->info.auth_ctl); + debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__, + event, event->cpu); event->hw.config = attr->config; event->hw.config_base = 0; - local64_set(&event->count, 0); - /* Add all authorized counter sets to config_base */ + /* Add all authorized counter sets to config_base. The + * the hardware init function is either called per-cpu or just once + * for all CPUS (event->cpu == -1). This depends on the whether + * counting is started for all CPUs or on a per workload base where + * the perf event moves from one CPU to another CPU. + * Checking the authorization on any CPU is fine as the hardware + * applies the same authorization settings to all CPUs. + */ + cpuhw = &get_cpu_var(cpu_cf_events); for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i]) event->hw.config_base |= cpumf_ctr_ctl[i]; + put_cpu_var(cpu_cf_events); /* No authorized counter sets, nothing to count/sample */ if (!event->hw.config_base) { -- cgit From 31d4c528cea4023cf36f6148c03bb960cedefeef Mon Sep 17 00:00:00 2001 From: Vincent Stehlé Date: Wed, 27 Mar 2019 23:06:42 +0100 Subject: cpufreq: scpi: Fix use after free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free the priv structure only after we are done using it. Fixes: 1690d8bb91e370ab ("cpufreq: scpi/scmi: Fix freeing of dynamic OPPs") Signed-off-by: Vincent Stehlé Cc: 4.20+ # 4.20+ Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/scpi-cpufreq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c index 3f49427766b8..2b51e0718c9f 100644 --- a/drivers/cpufreq/scpi-cpufreq.c +++ b/drivers/cpufreq/scpi-cpufreq.c @@ -189,8 +189,8 @@ static int scpi_cpufreq_exit(struct cpufreq_policy *policy) clk_put(priv->clk); dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table); - kfree(priv); dev_pm_opp_remove_all_dynamic(priv->cpu_dev); + kfree(priv); return 0; } -- cgit From 056d28d135bca0b1d0908990338e00e9dadaf057 Mon Sep 17 00:00:00 2001 From: Rolf Eike Beer Date: Tue, 26 Mar 2019 12:48:39 -0500 Subject: objtool: Query pkg-config for libelf location If it is not in the default location, compilation fails at several points. Signed-off-by: Rolf Eike Beer Signed-off-by: Josh Poimboeuf Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/91a25e992566a7968fedc89ec80e7f4c83ad0548.1553622500.git.jpoimboe@redhat.com --- Makefile | 4 +++- tools/objtool/Makefile | 7 +++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index c0a34064c574..eef3d3f87c3b 100644 --- a/Makefile +++ b/Makefile @@ -950,9 +950,11 @@ mod_sign_cmd = true endif export mod_sign_cmd +HOST_LIBELF_LIBS = $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf) + ifdef CONFIG_STACK_VALIDATION has_libelf := $(call try-run,\ - echo "int main() {}" | $(HOSTCC) -xc -o /dev/null -lelf -,1,0) + echo "int main() {}" | $(HOSTCC) -xc -o /dev/null $(HOST_LIBELF_LIBS) -,1,0) ifeq ($(has_libelf),1) objtool_target := tools/objtool FORCE else diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index c9d038f91af6..53f8be0f4a1f 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -25,14 +25,17 @@ LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a OBJTOOL := $(OUTPUT)objtool OBJTOOL_IN := $(OBJTOOL)-in.o +LIBELF_FLAGS := $(shell pkg-config libelf --cflags 2>/dev/null) +LIBELF_LIBS := $(shell pkg-config libelf --libs 2>/dev/null || echo -lelf) + all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/objtool/arch/$(ARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -CFLAGS += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) -LDFLAGS += -lelf $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) +CFLAGS += -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) +LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS) # Allow old libelf to be used: elfshdr := $(shell echo '$(pound)include ' | $(CC) $(CFLAGS) -x c -E - | grep elf_getshdr) -- cgit From 7dd47617114921fdd8c095509e5e7b4373cc44a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Mar 2019 22:51:02 +0100 Subject: watchdog: Respect watchdog cpumask on CPU hotplug The rework of the watchdog core to use cpu_stop_work broke the watchdog cpumask on CPU hotplug. The watchdog_enable/disable() functions are now called unconditionally from the hotplug callback, i.e. even on CPUs which are not in the watchdog cpumask. As a consequence the watchdog can become unstoppable. Only invoke them when the plugged CPU is in the watchdog cpumask. Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work") Reported-by: Maxime Coquelin Signed-off-by: Thomas Gleixner Tested-by: Maxime Coquelin Cc: Peter Zijlstra Cc: Oleg Nesterov Cc: Michael Ellerman Cc: Nicholas Piggin Cc: Don Zickus Cc: Ricardo Neri Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1903262245490.1789@nanos.tec.linutronix.de --- kernel/watchdog.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 403c9bd90413..6a5787233113 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -554,13 +554,15 @@ static void softlockup_start_all(void) int lockup_detector_online_cpu(unsigned int cpu) { - watchdog_enable(cpu); + if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) + watchdog_enable(cpu); return 0; } int lockup_detector_offline_cpu(unsigned int cpu) { - watchdog_disable(cpu); + if (cpumask_test_cpu(cpu, &watchdog_allowed_mask)) + watchdog_disable(cpu); return 0; } -- cgit From 206b92353c839c0b27a0b9bec24195f93fd6cf7a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Mar 2019 17:36:05 +0100 Subject: cpu/hotplug: Prevent crash when CPU bringup fails on CONFIG_HOTPLUG_CPU=n Tianyu reported a crash in a CPU hotplug teardown callback when booting a kernel which has CONFIG_HOTPLUG_CPU disabled with the 'nosmt' boot parameter. It turns out that the SMP=y CONFIG_HOTPLUG_CPU=n case has been broken forever in case that a bringup callback fails. Unfortunately this issue was not recognized when the CPU hotplug code was reworked, so the shortcoming just stayed in place. When a bringup callback fails, the CPU hotplug code rolls back the operation and takes the CPU offline. The 'nosmt' command line argument uses a bringup failure to abort the bringup of SMT sibling CPUs. This partial bringup is required due to the MCE misdesign on Intel CPUs. With CONFIG_HOTPLUG_CPU=y the rollback works perfectly fine, but CONFIG_HOTPLUG_CPU=n lacks essential mechanisms to exercise the low level teardown of a CPU including the synchronizations in various facilities like RCU, NOHZ and others. As a consequence the teardown callbacks which must be executed on the outgoing CPU within stop machine with interrupts disabled are executed on the control CPU in interrupt enabled and preemptible context causing the kernel to crash and burn. The pre state machine code has a different failure mode which is more subtle and resulting in a less obvious use after free crash because the control side frees resources which are still in use by the undead CPU. But this is not a x86 only problem. Any architecture which supports the SMP=y HOTPLUG_CPU=n combination suffers from the same issue. It's just less likely to be triggered because in 99.99999% of the cases all bringup callbacks succeed. The easy solution of making HOTPLUG_CPU mandatory for SMP is not working on all architectures as the following architectures have either no hotplug support at all or not all subarchitectures support it: alpha, arc, hexagon, openrisc, riscv, sparc (32bit), mips (partial). Crashing the kernel in such a situation is not an acceptable state either. Implement a minimal rollback variant by limiting the teardown to the point where all regular teardown callbacks have been invoked and leave the CPU in the 'dead' idle state. This has the following consequences: - the CPU is brought down to the point where the stop_machine takedown would happen. - the CPU stays there forever and is idle - The CPU is cleared in the CPU active mask, but not in the CPU online mask which is a legit state. - Interrupts are not forced away from the CPU - All facilities which only look at online mask would still see it, but that is the case during normal hotplug/unplug operations as well. It's just a (way) longer time frame. This will expose issues, which haven't been exposed before or only seldom, because now the normally transient state of being non active but online is a permanent state. In testing this exposed already an issue vs. work queues where the vmstat code schedules work on the almost dead CPU which ends up in an unbound workqueue and triggers 'preemtible context' warnings. This is not a problem of this change, it merily exposes an already existing issue. Still this is better than crashing fully without a chance to debug it. This is mainly thought as workaround for those architectures which do not support HOTPLUG_CPU. All others should enforce HOTPLUG_CPU for SMP. Fixes: 2e1a3483ce74 ("cpu/hotplug: Split out the state walk into functions") Reported-by: Tianyu Lan Signed-off-by: Thomas Gleixner Tested-by: Tianyu Lan Acked-by: Greg Kroah-Hartman Cc: Konrad Wilk Cc: Josh Poimboeuf Cc: Mukesh Ojha Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Rik van Riel Cc: Andy Lutomirski Cc: Micheal Kelley Cc: "K. Y. Srinivasan" Cc: Linus Torvalds Cc: Borislav Petkov Cc: K. Y. Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190326163811.503390616@linutronix.de --- kernel/cpu.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/kernel/cpu.c b/kernel/cpu.c index 025f419d16f6..6754f3ecfd94 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -564,6 +564,20 @@ static void undo_cpu_up(unsigned int cpu, struct cpuhp_cpu_state *st) cpuhp_invoke_callback(cpu, st->state, false, NULL, NULL); } +static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) +{ + if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + /* + * When CPU hotplug is disabled, then taking the CPU down is not + * possible because takedown_cpu() and the architecture and + * subsystem specific mechanisms are not available. So the CPU + * which would be completely unplugged again needs to stay around + * in the current state. + */ + return st->state <= CPUHP_BRINGUP_CPU; +} + static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { @@ -574,8 +588,10 @@ static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, st->state++; ret = cpuhp_invoke_callback(cpu, st->state, true, NULL, NULL); if (ret) { - st->target = prev_state; - undo_cpu_up(cpu, st); + if (can_rollback_cpu(st)) { + st->target = prev_state; + undo_cpu_up(cpu, st); + } break; } } -- cgit From bebd024e4815b1a170fcd21ead9c2222b23ce9e6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 26 Mar 2019 17:36:06 +0100 Subject: x86/smp: Enforce CONFIG_HOTPLUG_CPU when SMP=y The SMT disable 'nosmt' command line argument is not working properly when CONFIG_HOTPLUG_CPU is disabled. The teardown of the sibling CPUs which are required to be brought up due to the MCE issues, cannot work. The CPUs are then kept in a half dead state. As the 'nosmt' functionality has become popular due to the speculative hardware vulnerabilities, the half torn down state is not a proper solution to the problem. Enforce CONFIG_HOTPLUG_CPU=y when SMP is enabled so the full operation is possible. Reported-by: Tianyu Lan Signed-off-by: Thomas Gleixner Acked-by: Greg Kroah-Hartman Cc: Konrad Wilk Cc: Josh Poimboeuf Cc: Mukesh Ojha Cc: Peter Zijlstra Cc: Jiri Kosina Cc: Rik van Riel Cc: Andy Lutomirski Cc: Micheal Kelley Cc: "K. Y. Srinivasan" Cc: Linus Torvalds Cc: Borislav Petkov Cc: K. Y. Srinivasan Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20190326163811.598166056@linutronix.de --- arch/x86/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c1f9b3cf437c..5ad92419be19 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2217,14 +2217,8 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING If unsure, leave at the default value. config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" + def_bool y depends on SMP - ---help--- - Say Y here to allow turning CPUs off and on. CPUs can be - controlled through /sys/devices/system/cpu. - ( Note: power management support will enable this option - automatically on SMP systems. ) - Say N if you want to disable CPU hotplug. config BOOTPARAM_HOTPLUG_CPU0 bool "Set default setting of cpu0_hotpluggable" -- cgit From a9d57ef15cbe327fe54416dd194ee0ea66ae53a4 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Mar 2019 14:56:20 +0100 Subject: x86/retpolines: Disable switch jump tables when retpolines are enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit ce02ef06fcf7 ("x86, retpolines: Raise limit for generating indirect calls from switch-case") raised the limit under retpolines to 20 switch cases where gcc would only then start to emit jump tables, and therefore effectively disabling the emission of slow indirect calls in this area. After this has been brought to attention to gcc folks [0], Martin Liska has then fixed gcc to align with clang by avoiding to generate switch jump tables entirely under retpolines. This is taking effect in gcc starting from stable version 8.4.0. Given kernel supports compilation with older versions of gcc where the fix is not being available or backported anymore, we need to keep the extra KBUILD_CFLAGS around for some time and generally set the -fno-jump-tables to align with what more recent gcc is doing automatically today. More than 20 switch cases are not expected to be fast-path critical, but it would still be good to align with gcc behavior for versions < 8.4.0 in order to have consistency across supported gcc versions. vmlinux size is slightly growing by 0.27% for older gcc. This flag is only set to work around affected gcc, no change for clang. [0] https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86952 Suggested-by: Martin Liska Signed-off-by: Daniel Borkmann Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Linus Torvalds Cc: Jesper Dangaard Brouer Cc: Björn Töpel Cc: Magnus Karlsson Cc: Alexei Starovoitov Cc: H.J. Lu Cc: Alexei Starovoitov Cc: David S. Miller Link: https://lkml.kernel.org/r/20190325135620.14882-1-daniel@iogearbox.net --- arch/x86/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d8b9d8ca4f8..a587805c6687 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -219,8 +219,12 @@ ifdef CONFIG_RETPOLINE # Additionally, avoid generating expensive indirect jumps which # are subject to retpolines for small number of switch cases. # clang turns off jump table generation by default when under - # retpoline builds, however, gcc does not for x86. - KBUILD_CFLAGS += $(call cc-option,--param=case-values-threshold=20) + # retpoline builds, however, gcc does not for x86. This has + # only been fixed starting from gcc stable version 8.4.0 and + # onwards, but not for older ones. See gcc bug #86952. + ifndef CONFIG_CC_IS_CLANG + KBUILD_CFLAGS += $(call cc-option,-fno-jump-tables) + endif endif archscripts: scripts_basic -- cgit From 92c77f7c4d5dfaaf45b2ce19360e69977c264766 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Mon, 25 Mar 2019 17:18:17 -0700 Subject: x86/mm: Don't exceed the valid physical address space valid_phys_addr_range() is used to sanity check the physical address range of an operation, e.g., access to /dev/mem. It uses __pa(high_memory) internally. If memory is populated at the end of the physical address space, then __pa(high_memory) is outside of the physical address space because: high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; For the comparison in valid_phys_addr_range() this is not an issue, but if CONFIG_DEBUG_VIRTUAL is enabled, __pa() maps to __phys_addr(), which verifies that the resulting physical address is within the valid physical address space of the CPU. So in the case that memory is populated at the end of the physical address space, this is not true and triggers a VIRTUAL_BUG_ON(). Use __pa(high_memory - 1) to prevent the conversion from going beyond the end of valid physical addresses. Fixes: be62a3204406 ("x86/mm: Limit mmap() of /dev/mem to valid physical addresses") Signed-off-by: Ralph Campbell Signed-off-by: Thomas Gleixner Cc: Craig Bergstrom Cc: Linus Torvalds Cc: Boris Ostrovsky Cc: Fengguang Wu Cc: Greg Kroah-Hartman Cc: Hans Verkuil Cc: Mauro Carvalho Chehab Cc: Peter Zijlstra Cc: Sander Eikelenboom Cc: Sean Young Link: https://lkml.kernel.org/r/20190326001817.15413-2-rcampbell@nvidia.com --- arch/x86/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index db3165714521..dc726e07d8ba 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -230,7 +230,7 @@ bool mmap_address_hint_valid(unsigned long addr, unsigned long len) /* Can we access it for direct reading/writing? Must be RAM: */ int valid_phys_addr_range(phys_addr_t addr, size_t count) { - return addr + count <= __pa(high_memory); + return addr + count - 1 <= __pa(high_memory - 1); } /* Can we access it through mmap? Must be a valid physical address: */ -- cgit From 8324c3d518cfd69f2a17866b52c13bf56d3042d8 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Mon, 25 Mar 2019 08:02:05 +0000 Subject: KVM: arm/arm64: Comments cleanup in mmu.c Some comments in virt/kvm/arm/mmu.c are outdated. Update them to reflect the current state of the code. Signed-off-by: Zenghui Yu Reviewed-by: Suzuki K Poulose [maz: commit message tidy-up] Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index f9da2fad9bd6..27c958306449 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -102,8 +102,7 @@ static bool kvm_is_device_pfn(unsigned long pfn) * @addr: IPA * @pmd: pmd pointer for IPA * - * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. Marks all - * pages in the range dirty. + * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. */ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) { @@ -121,8 +120,7 @@ static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) * @addr: IPA * @pud: pud pointer for IPA * - * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. Marks all - * pages in the range dirty. + * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. */ static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) { @@ -899,9 +897,8 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. * @kvm: The KVM struct pointer for the VM. * - * Allocates only the stage-2 HW PGD level table(s) (can support either full - * 40-bit input addresses or limited to 32-bit input addresses). Clears the - * allocated pages. + * Allocates only the stage-2 HW PGD level table(s) of size defined by + * stage2_pgd_size(kvm). * * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. @@ -1478,13 +1475,11 @@ static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, } /** - * stage2_wp_puds - write protect PGD range - * @pgd: pointer to pgd entry - * @addr: range start address - * @end: range end address - * - * Process PUD entries, for a huge PUD we cause a panic. - */ + * stage2_wp_puds - write protect PGD range + * @pgd: pointer to pgd entry + * @addr: range start address + * @end: range end address + */ static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr, phys_addr_t end) { -- cgit From 93a64ee71d10abc5787de7d4a00027e2c6469c3b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 28 Mar 2019 14:29:27 +0100 Subject: MAINTAINERS: Remove deleted file from futex file pattern kernel/futex_compat.c was recently removed, but it's still in the MAINTAINERS file. Remove it there as well. Fixes: 04e7712f4460 ("y2038: futex: Move compat implementation into futex.c") Reported-by: Joe Perches Signed-off-by: Thomas Gleixner Cc: Arnd Bergmann --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3e5a5d263f29..f015ff786109 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6408,7 +6408,6 @@ L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core S: Maintained F: kernel/futex.c -F: kernel/futex_compat.c F: include/asm-generic/futex.h F: include/linux/futex.h F: include/uapi/linux/futex.h -- cgit From 26cdaac4793c49357d2c731f2190632cefb7efb1 Mon Sep 17 00:00:00 2001 From: José Roberto de Souza Date: Tue, 26 Mar 2019 16:02:23 -0700 Subject: drm/i915/icl: Fix VEBOX mismatch BUG_ON() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GT VEBOX DISABLE is only 4 bits wide but it was using a 8 bits wide mask, the remaning reserved bits is set to 0 causing 4 more nonexistent VEBOX engines being detected as enabled, triggering the BUG_ON() because of mismatch between vebox_mask and newly added VEBOX_MASK(). [ 64.081621] [drm:intel_device_info_init_mmio [i915]] vdbox enable: 0005, instances: 0005 [ 64.081763] [drm:intel_device_info_init_mmio [i915]] vebox enable: 00f1, instances: 0001 [ 64.081825] intel_device_info_init_mmio:925 GEM_BUG_ON(vebox_mask != ({ unsigned int first__ = (VECS0); unsigned int count__ = (2); ((&(dev_priv)->__info)->engine_mask & (((~0UL) - (1UL << (first__)) + 1) & (~0UL >> (64 - 1 - (first__ + count__ - 1))))) >> first__; })) [ 64.082047] ------------[ cut here ]------------ [ 64.082054] kernel BUG at drivers/gpu/drm/i915/intel_device_info.c:925! BSpec: 20680 Fixes: 26376a7e74d2 ("drm/i915/icl: Check for fused-off VDBOX and VEBOX instances") Cc: Chris Wilson Cc: Tvrtko Ursulin Cc: Oscar Mateo Signed-off-by: José Roberto de Souza Reviewed-by: Tvrtko Ursulin Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20190326230223.26336-1-jose.souza@intel.com (cherry picked from commit 547fcf9b1c608cf5c43c156a8773a94c6a38dc44) Signed-off-by: Jani Nikula --- drivers/gpu/drm/i915/i915_reg.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index 684589acc368..047855dd8c6b 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -2863,7 +2863,7 @@ enum i915_power_well_id { #define GEN11_GT_VEBOX_VDBOX_DISABLE _MMIO(0x9140) #define GEN11_GT_VDBOX_DISABLE_MASK 0xff #define GEN11_GT_VEBOX_DISABLE_SHIFT 16 -#define GEN11_GT_VEBOX_DISABLE_MASK (0xff << GEN11_GT_VEBOX_DISABLE_SHIFT) +#define GEN11_GT_VEBOX_DISABLE_MASK (0x0f << GEN11_GT_VEBOX_DISABLE_SHIFT) #define GEN11_EU_DISABLE _MMIO(0x9134) #define GEN11_EU_DIS_MASK 0xFF -- cgit From dd08a8d9a66de4b54575c294a92630299f7e0fe7 Mon Sep 17 00:00:00 2001 From: raymond pang Date: Thu, 28 Mar 2019 12:19:25 +0000 Subject: libata: fix using DMA buffers on stack When CONFIG_VMAP_STACK=y, __pa() returns incorrect physical address for a stack virtual address. Stack DMA buffers must be avoided. Signed-off-by: raymond pang Signed-off-by: Jens Axboe --- drivers/ata/libata-zpodd.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/ata/libata-zpodd.c b/drivers/ata/libata-zpodd.c index b3ed8f9953a8..173e6f2dd9af 100644 --- a/drivers/ata/libata-zpodd.c +++ b/drivers/ata/libata-zpodd.c @@ -52,38 +52,52 @@ static int eject_tray(struct ata_device *dev) /* Per the spec, only slot type and drawer type ODD can be supported */ static enum odd_mech_type zpodd_get_mech_type(struct ata_device *dev) { - char buf[16]; + char *buf; unsigned int ret; - struct rm_feature_desc *desc = (void *)(buf + 8); + struct rm_feature_desc *desc; struct ata_taskfile tf; static const char cdb[] = { GPCMD_GET_CONFIGURATION, 2, /* only 1 feature descriptor requested */ 0, 3, /* 3, removable medium feature */ 0, 0, 0,/* reserved */ - 0, sizeof(buf), + 0, 16, 0, 0, 0, }; + buf = kzalloc(16, GFP_KERNEL); + if (!buf) + return ODD_MECH_TYPE_UNSUPPORTED; + desc = (void *)(buf + 8); + ata_tf_init(dev, &tf); tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; tf.command = ATA_CMD_PACKET; tf.protocol = ATAPI_PROT_PIO; - tf.lbam = sizeof(buf); + tf.lbam = 16; ret = ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, - buf, sizeof(buf), 0); - if (ret) + buf, 16, 0); + if (ret) { + kfree(buf); return ODD_MECH_TYPE_UNSUPPORTED; + } - if (be16_to_cpu(desc->feature_code) != 3) + if (be16_to_cpu(desc->feature_code) != 3) { + kfree(buf); return ODD_MECH_TYPE_UNSUPPORTED; + } - if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) + if (desc->mech_type == 0 && desc->load == 0 && desc->eject == 1) { + kfree(buf); return ODD_MECH_TYPE_SLOT; - else if (desc->mech_type == 1 && desc->load == 0 && desc->eject == 1) + } else if (desc->mech_type == 1 && desc->load == 0 && + desc->eject == 1) { + kfree(buf); return ODD_MECH_TYPE_DRAWER; - else + } else { + kfree(buf); return ODD_MECH_TYPE_UNSUPPORTED; + } } /* Test if ODD is zero power ready by sense code */ -- cgit From 7265f5b72640f43e558af80347c62e32d568371f Mon Sep 17 00:00:00 2001 From: Wen Yang Date: Sat, 23 Mar 2019 14:14:31 +0800 Subject: coccinelle: put_device: reduce false positives Don't complain about a return when this function returns "&pdev->dev". Fixes: da9cfb87a44d ("coccinelle: semantic code search for missing put_device()") Reported-by: Julia Lawall Signed-off-by: Wen Yang Acked-by: Julia Lawall Signed-off-by: Masahiro Yamada --- scripts/coccinelle/free/put_device.cocci | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/coccinelle/free/put_device.cocci b/scripts/coccinelle/free/put_device.cocci index 7395697e7f19..c9f071b0a0ab 100644 --- a/scripts/coccinelle/free/put_device.cocci +++ b/scripts/coccinelle/free/put_device.cocci @@ -32,6 +32,7 @@ if (id == NULL || ...) { ... return ...; } ( id | (T2)dev_get_drvdata(&id->dev) | (T3)platform_get_drvdata(id) +| &id->dev ); | return@p2 ...; ) -- cgit From 221cc2d27ddc49b3e06d4637db02bf78e70c573c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Mar 2019 13:02:19 +0900 Subject: kbuild: skip parsing pre sub-make code for recursion When Make recurses to the top Makefile with sub-make-done unset, the code block surrounded by 'ifneq ($(sub-make-done),1) ... endif' is parsed multiple times. This happens for in-tree building of include/config/auto.conf, *-pkg, etc. with GNU Make 4.x. This is a slight regression by commit 688931a5ad4e ("kbuild: skip sub-make for in-tree build with GNU Make 4.x") in terms of performance since that code block contains one $(shell ...) invocation. Fix it by exporting the variable irrespective of sub-make being run. I renamed it because GNU Make cannot properly export variables containing hyphens. This is probably a bug of GNU Make, and the issue in Kbuild had already been reported by commit 2bfbe7881ee0 ("kbuild: Do not use hyphen in exported variable name"). Signed-off-by: Masahiro Yamada --- Makefile | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 6c0635f69982..3c788a65d652 100644 --- a/Makefile +++ b/Makefile @@ -31,7 +31,7 @@ _all: # descending is started. They are now explicitly listed as the # prepare rule. -ifneq ($(sub-make-done),1) +ifneq ($(sub_make_done),1) # Do not use make's built-in rules and variables # (this increases performance and avoids hard-to-debug behaviour) @@ -155,6 +155,8 @@ need-sub-make := 1 $(lastword $(MAKEFILE_LIST)): ; endif +export sub_make_done := 1 + ifeq ($(need-sub-make),1) PHONY += $(MAKECMDGOALS) sub-make @@ -164,12 +166,12 @@ $(filter-out _all sub-make $(CURDIR)/Makefile, $(MAKECMDGOALS)) _all: sub-make # Invoke a second make in the output directory, passing relevant variables sub-make: - $(Q)$(MAKE) sub-make-done=1 \ + $(Q)$(MAKE) \ $(if $(KBUILD_OUTPUT),-C $(KBUILD_OUTPUT) KBUILD_SRC=$(CURDIR)) \ -f $(CURDIR)/Makefile $(filter-out _all sub-make,$(MAKECMDGOALS)) endif # need-sub-make -endif # sub-make-done +endif # sub_make_done # We process the rest of the Makefile if this is the final invocation of make ifeq ($(need-sub-make),) -- cgit From 156e7cbb3ef50d6d58b7975d79802e4ffb024dc2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 26 Mar 2019 13:26:58 +0900 Subject: kbuild: do not overwrite .gitignore in output directory Commit 3a51ff344204 ("kbuild: gitignore output directory") seemed to bother people who version-control output directories. Andre Przywara says: "Unfortunately this breaks my setup, because I keep a totally separate git repository in my build directories to track (various versions of) .config. So .gitignore there is carefully crafted to ignore most build artefacts, but not .config, for instance." Link: https://lkml.org/lkml/2019/3/22/1819 Reported-by: Andre Przywara Signed-off-by: Masahiro Yamada Tested-by: Andre Przywara Reviewed-by: Andre Przywara --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3c788a65d652..2810425541f4 100644 --- a/Makefile +++ b/Makefile @@ -499,7 +499,8 @@ outputmakefile: ifneq ($(KBUILD_SRC),) $(Q)ln -fsn $(srctree) source $(Q)$(CONFIG_SHELL) $(srctree)/scripts/mkmakefile $(srctree) - $(Q){ echo "# this is build directory, ignore it"; echo "*"; } > .gitignore + $(Q)test -e .gitignore || \ + { echo "# this is build directory, ignore it"; echo "*"; } > .gitignore endif ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),) -- cgit From 1a49b2fd8f58dd397043a17de9b3c421ccf8eda7 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Tue, 26 Mar 2019 10:50:28 -0400 Subject: kbuild: strip whitespace in cmd_record_mcount findstring CC_FLAGS_FTRACE may contain trailing whitespace that interferes with findstring. For example, commit 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer") introduced a change such that on my ppc64le box, CC_FLAGS_FTRACE="-pg -mprofile-kernel ". (Note the trailing space.) When cmd_record_mcount is now invoked, findstring fails as the ftrace flags were found at very end of _c_flags, without the trailing space. _c_flags=" ... -pg -mprofile-kernel" CC_FLAGS_FTRACE="-pg -mprofile-kernel " ^ findstring is looking for this extra space Remove the redundant whitespaces from CC_FLAGS_FTRACE in cmd_record_mcount to avoid this problem. [masahiro.yamada: This issue only happens in the released versions of GNU Make. CC_FLAGS_FTRACE will not contain the trailing space if you use the latest GNU Make, which contains commit b90fabc8d6f3 ("* NEWS: Do not insert a space during '+=' if the value is empty.") ] Suggested-by: Masahiro Yamada (refactoring) Fixes: 6977f95e63b9 ("powerpc: avoid -mno-sched-epilog on GCC 4.9 and newer"). Signed-off-by: Joe Lawrence Acked-by: Steven Rostedt (VMware) Signed-off-by: Masahiro Yamada --- scripts/Makefile.build | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/scripts/Makefile.build b/scripts/Makefile.build index 2554a15ecf2b..76ca30cc4791 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -199,11 +199,8 @@ sub_cmd_record_mcount = perl $(srctree)/scripts/recordmcount.pl "$(ARCH)" \ "$(if $(part-of-module),1,0)" "$(@)"; recordmcount_source := $(srctree)/scripts/recordmcount.pl endif # BUILD_C_RECORDMCOUNT -cmd_record_mcount = \ - if [ "$(findstring $(CC_FLAGS_FTRACE),$(_c_flags))" = \ - "$(CC_FLAGS_FTRACE)" ]; then \ - $(sub_cmd_record_mcount) \ - fi +cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)), \ + $(sub_cmd_record_mcount)) endif # CC_USING_RECORD_MCOUNT endif # CONFIG_FTRACE_MCOUNT_RECORD -- cgit From 7fcddf7c004140052d6f72273a0455239ad49112 Mon Sep 17 00:00:00 2001 From: Michael Stefaniuc Date: Tue, 26 Mar 2019 22:22:00 +0100 Subject: scripts: coccinelle: Fix description of badty.cocci Summary was copy and pasted from array_size.cocci. Signed-off-by: Michael Stefaniuc Acked-by: Julia Lawall Signed-off-by: Masahiro Yamada --- scripts/coccinelle/misc/badty.cocci | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/coccinelle/misc/badty.cocci b/scripts/coccinelle/misc/badty.cocci index 481cf301ccfc..08470362199c 100644 --- a/scripts/coccinelle/misc/badty.cocci +++ b/scripts/coccinelle/misc/badty.cocci @@ -1,4 +1,4 @@ -/// Use ARRAY_SIZE instead of dividing sizeof array with sizeof an element +/// Correct the size argument to alloc functions /// //# This makes an effort to find cases where the argument to sizeof is wrong //# in memory allocation functions by checking the type of the allocated memory -- cgit From 54a7151b1496cddbb7a83546b7998103e98edc88 Mon Sep 17 00:00:00 2001 From: Fredrik Noring Date: Wed, 27 Mar 2019 19:12:50 +0100 Subject: kbuild: modversions: Fix relative CRC byte order interpretation Fix commit 56067812d5b0 ("kbuild: modversions: add infrastructure for emitting relative CRCs") where CRCs are interpreted in host byte order rather than proper kernel byte order. The bug is conditional on CONFIG_MODULE_REL_CRCS. For example, when loading a BE module into a BE kernel compiled with a LE system, the error "disagrees about version of symbol module_layout" is produced. A message such as "Found checksum D7FA6856 vs module 5668FAD7" will be given with debug enabled, which indicates an obvious endian problem within __kcrctab within the kernel image. The general solution is to use the macro TO_NATIVE, as is done in similar cases throughout modpost.c. With this correction it has been verified that a BE kernel compiled with a LE system accepts BE modules. This change has also been verified with a LE kernel compiled with a LE system, in which case TO_NATIVE returns its value unmodified since the byte orders match. This is by far the common case. Fixes: 56067812d5b0 ("kbuild: modversions: add infrastructure for emitting relative CRCs") Signed-off-by: Fredrik Noring Cc: stable@vger.kernel.org Signed-off-by: Masahiro Yamada --- scripts/mod/modpost.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 0b0d1080b1c5..f277e116e0eb 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -639,7 +639,7 @@ static void handle_modversions(struct module *mod, struct elf_info *info, info->sechdrs[sym->st_shndx].sh_offset - (info->hdr->e_type != ET_REL ? info->sechdrs[sym->st_shndx].sh_addr : 0); - crc = *crcp; + crc = TO_NATIVE(*crcp); } sym_update_crc(symname + strlen("__crc_"), mod, crc, export); -- cgit From 7d6ab823d6461e60d211d4c8d89a13dce08b730d Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Mar 2019 22:53:31 +0000 Subject: vfs: Update mount API docs Update the mount API docs to reflect recent changes to the code. Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- Documentation/filesystems/mount_api.txt | 367 +++++++++++++++++--------------- 1 file changed, 195 insertions(+), 172 deletions(-) diff --git a/Documentation/filesystems/mount_api.txt b/Documentation/filesystems/mount_api.txt index 944d1965e917..00ff0cfccfa7 100644 --- a/Documentation/filesystems/mount_api.txt +++ b/Documentation/filesystems/mount_api.txt @@ -12,11 +12,13 @@ CONTENTS (4) Filesystem context security. - (5) VFS filesystem context operations. + (5) VFS filesystem context API. - (6) Parameter description. + (6) Superblock creation helpers. - (7) Parameter helper functions. + (7) Parameter description. + + (8) Parameter helper functions. ======== @@ -41,12 +43,15 @@ The creation of new mounts is now to be done in a multistep process: (7) Destroy the context. -To support this, the file_system_type struct gains a new field: +To support this, the file_system_type struct gains two new fields: int (*init_fs_context)(struct fs_context *fc); + const struct fs_parameter_description *parameters; -which is invoked to set up the filesystem-specific parts of a filesystem -context, including the additional space. +The first is invoked to set up the filesystem-specific parts of a filesystem +context, including the additional space, and the second points to the +parameter description for validation at registration time and querying by a +future system call. Note that security initialisation is done *after* the filesystem is called so that the namespaces may be adjusted first. @@ -73,9 +78,9 @@ context. This is represented by the fs_context structure: void *s_fs_info; unsigned int sb_flags; unsigned int sb_flags_mask; + unsigned int s_iflags; + unsigned int lsm_flags; enum fs_context_purpose purpose:8; - bool sloppy:1; - bool silent:1; ... }; @@ -141,6 +146,10 @@ The fs_context fields are as follows: Which bits SB_* flags are to be set/cleared in super_block::s_flags. + (*) unsigned int s_iflags + + These will be bitwise-OR'd with s->s_iflags when a superblock is created. + (*) enum fs_context_purpose This indicates the purpose for which the context is intended. The @@ -150,17 +159,6 @@ The fs_context fields are as follows: FS_CONTEXT_FOR_SUBMOUNT -- New automatic submount of extant mount FS_CONTEXT_FOR_RECONFIGURE -- Change an existing mount - (*) bool sloppy - (*) bool silent - - These are set if the sloppy or silent mount options are given. - - [NOTE] sloppy is probably unnecessary when userspace passes over one - option at a time since the error can just be ignored if userspace deems it - to be unimportant. - - [NOTE] silent is probably redundant with sb_flags & SB_SILENT. - The mount context is created by calling vfs_new_fs_context() or vfs_dup_fs_context() and is destroyed with put_fs_context(). Note that the structure is not refcounted. @@ -342,28 +340,47 @@ number of operations used by the new mount code for this purpose: It should return 0 on success or a negative error code on failure. -================================= -VFS FILESYSTEM CONTEXT OPERATIONS -================================= +========================== +VFS FILESYSTEM CONTEXT API +========================== -There are four operations for creating a filesystem context and -one for destroying a context: +There are four operations for creating a filesystem context and one for +destroying a context: - (*) struct fs_context *vfs_new_fs_context(struct file_system_type *fs_type, - struct dentry *reference, - unsigned int sb_flags, - unsigned int sb_flags_mask, - enum fs_context_purpose purpose); + (*) struct fs_context *fs_context_for_mount( + struct file_system_type *fs_type, + unsigned int sb_flags); - Create a filesystem context for a given filesystem type and purpose. This - allocates the filesystem context, sets the superblock flags, initialises - the security and calls fs_type->init_fs_context() to initialise the - filesystem private data. + Allocate a filesystem context for the purpose of setting up a new mount, + whether that be with a new superblock or sharing an existing one. This + sets the superblock flags, initialises the security and calls + fs_type->init_fs_context() to initialise the filesystem private data. - reference can be NULL or it may indicate the root dentry of a superblock - that is going to be reconfigured (FS_CONTEXT_FOR_RECONFIGURE) or - the automount point that triggered a submount (FS_CONTEXT_FOR_SUBMOUNT). - This is provided as a source of namespace information. + fs_type specifies the filesystem type that will manage the context and + sb_flags presets the superblock flags stored therein. + + (*) struct fs_context *fs_context_for_reconfigure( + struct dentry *dentry, + unsigned int sb_flags, + unsigned int sb_flags_mask); + + Allocate a filesystem context for the purpose of reconfiguring an + existing superblock. dentry provides a reference to the superblock to be + configured. sb_flags and sb_flags_mask indicate which superblock flags + need changing and to what. + + (*) struct fs_context *fs_context_for_submount( + struct file_system_type *fs_type, + struct dentry *reference); + + Allocate a filesystem context for the purpose of creating a new mount for + an automount point or other derived superblock. fs_type specifies the + filesystem type that will manage the context and the reference dentry + supplies the parameters. Namespaces are propagated from the reference + dentry's superblock also. + + Note that it's not a requirement that the reference dentry be of the same + filesystem type as fs_type. (*) struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc); @@ -390,20 +407,6 @@ context pointer or a negative error code. For the remaining operations, if an error occurs, a negative error code will be returned. - (*) int vfs_get_tree(struct fs_context *fc); - - Get or create the mountable root and superblock, using the parameters in - the filesystem context to select/configure the superblock. This invokes - the ->validate() op and then the ->get_tree() op. - - [NOTE] ->validate() could perhaps be rolled into ->get_tree() and - ->reconfigure(). - - (*) struct vfsmount *vfs_create_mount(struct fs_context *fc); - - Create a mount given the parameters in the specified filesystem context. - Note that this does not attach the mount to anything. - (*) int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param); @@ -432,17 +435,80 @@ returned. clear the pointer, but then becomes responsible for disposing of the object. - (*) int vfs_parse_fs_string(struct fs_context *fc, char *key, + (*) int vfs_parse_fs_string(struct fs_context *fc, const char *key, const char *value, size_t v_size); - A wrapper around vfs_parse_fs_param() that just passes a constant string. + A wrapper around vfs_parse_fs_param() that copies the value string it is + passed. (*) int generic_parse_monolithic(struct fs_context *fc, void *data); Parse a sys_mount() data page, assuming the form to be a text list consisting of key[=val] options separated by commas. Each item in the list is passed to vfs_mount_option(). This is the default when the - ->parse_monolithic() operation is NULL. + ->parse_monolithic() method is NULL. + + (*) int vfs_get_tree(struct fs_context *fc); + + Get or create the mountable root and superblock, using the parameters in + the filesystem context to select/configure the superblock. This invokes + the ->get_tree() method. + + (*) struct vfsmount *vfs_create_mount(struct fs_context *fc); + + Create a mount given the parameters in the specified filesystem context. + Note that this does not attach the mount to anything. + + +=========================== +SUPERBLOCK CREATION HELPERS +=========================== + +A number of VFS helpers are available for use by filesystems for the creation +or looking up of superblocks. + + (*) struct super_block * + sget_fc(struct fs_context *fc, + int (*test)(struct super_block *sb, struct fs_context *fc), + int (*set)(struct super_block *sb, struct fs_context *fc)); + + This is the core routine. If test is non-NULL, it searches for an + existing superblock matching the criteria held in the fs_context, using + the test function to match them. If no match is found, a new superblock + is created and the set function is called to set it up. + + Prior to the set function being called, fc->s_fs_info will be transferred + to sb->s_fs_info - and fc->s_fs_info will be cleared if set returns + success (ie. 0). + +The following helpers all wrap sget_fc(): + + (*) int vfs_get_super(struct fs_context *fc, + enum vfs_get_super_keying keying, + int (*fill_super)(struct super_block *sb, + struct fs_context *fc)) + + This creates/looks up a deviceless superblock. The keying indicates how + many superblocks of this type may exist and in what manner they may be + shared: + + (1) vfs_get_single_super + + Only one such superblock may exist in the system. Any further + attempt to get a new superblock gets this one (and any parameter + differences are ignored). + + (2) vfs_get_keyed_super + + Multiple superblocks of this type may exist and they're keyed on + their s_fs_info pointer (for example this may refer to a + namespace). + + (3) vfs_get_independent_super + + Multiple independent superblocks of this type may exist. This + function never matches an existing one and always creates a new + one. ===================== @@ -454,35 +520,22 @@ There's a core description struct that links everything together: struct fs_parameter_description { const char name[16]; - u8 nr_params; - u8 nr_alt_keys; - u8 nr_enums; - bool ignore_unknown; - bool no_source; - const char *const *keys; - const struct constant_table *alt_keys; const struct fs_parameter_spec *specs; const struct fs_parameter_enum *enums; }; For example: - enum afs_param { + enum { Opt_autocell, Opt_bar, Opt_dyn, Opt_foo, Opt_source, - nr__afs_params }; static const struct fs_parameter_description afs_fs_parameters = { .name = "kAFS", - .nr_params = nr__afs_params, - .nr_alt_keys = ARRAY_SIZE(afs_param_alt_keys), - .nr_enums = ARRAY_SIZE(afs_param_enums), - .keys = afs_param_keys, - .alt_keys = afs_param_alt_keys, .specs = afs_param_specs, .enums = afs_param_enums, }; @@ -494,28 +547,24 @@ The members are as follows: The name to be used in error messages generated by the parse helper functions. - (2) u8 nr_params; - - The number of discrete parameter identifiers. This indicates the number - of elements in the ->types[] array and also limits the values that may be - used in the values that the ->keys[] array maps to. - - It is expected that, for example, two parameters that are related, say - "acl" and "noacl" with have the same ID, but will be flagged to indicate - that one is the inverse of the other. The value can then be picked out - from the parse result. + (2) const struct fs_parameter_specification *specs; - (3) const struct fs_parameter_specification *specs; + Table of parameter specifications, terminated with a null entry, where the + entries are of type: - Table of parameter specifications, where the entries are of type: - - struct fs_parameter_type { - enum fs_parameter_spec type:8; - u8 flags; + struct fs_parameter_spec { + const char *name; + u8 opt; + enum fs_parameter_type type:8; + unsigned short flags; }; - and the parameter identifier is the index to the array. 'type' indicates - the desired value type and must be one of: + The 'name' field is a string to match exactly to the parameter key (no + wildcards, patterns and no case-independence) and 'opt' is the value that + will be returned by the fs_parser() function in the case of a successful + match. + + The 'type' field indicates the desired value type and must be one of: TYPE NAME EXPECTED VALUE RESULT IN ======================= ======================= ===================== @@ -525,85 +574,65 @@ The members are as follows: fs_param_is_u32_octal 32-bit octal int result->uint_32 fs_param_is_u32_hex 32-bit hex int result->uint_32 fs_param_is_s32 32-bit signed int result->int_32 + fs_param_is_u64 64-bit unsigned int result->uint_64 fs_param_is_enum Enum value name result->uint_32 fs_param_is_string Arbitrary string param->string fs_param_is_blob Binary blob param->blob fs_param_is_blockdev Blockdev path * Needs lookup fs_param_is_path Path * Needs lookup - fs_param_is_fd File descriptor param->file - - And each parameter can be qualified with 'flags': - - fs_param_v_optional The value is optional - fs_param_neg_with_no If key name is prefixed with "no", it is false - fs_param_neg_with_empty If value is "", it is false - fs_param_deprecated The parameter is deprecated. - - For example: - - static const struct fs_parameter_spec afs_param_specs[nr__afs_params] = { - [Opt_autocell] = { fs_param_is flag }, - [Opt_bar] = { fs_param_is_enum }, - [Opt_dyn] = { fs_param_is flag }, - [Opt_foo] = { fs_param_is_bool, fs_param_neg_with_no }, - [Opt_source] = { fs_param_is_string }, - }; + fs_param_is_fd File descriptor result->int_32 Note that if the value is of fs_param_is_bool type, fs_parse() will try to match any string value against "0", "1", "no", "yes", "false", "true". - [!] NOTE that the table must be sorted according to primary key name so - that ->keys[] is also sorted. - - (4) const char *const *keys; - - Table of primary key names for the parameters. There must be one entry - per defined parameter. The table is optional if ->nr_params is 0. The - table is just an array of names e.g.: + Each parameter can also be qualified with 'flags': - static const char *const afs_param_keys[nr__afs_params] = { - [Opt_autocell] = "autocell", - [Opt_bar] = "bar", - [Opt_dyn] = "dyn", - [Opt_foo] = "foo", - [Opt_source] = "source", - }; - - [!] NOTE that the table must be sorted such that the table can be searched - with bsearch() using strcmp(). This means that the Opt_* values must - correspond to the entries in this table. - - (5) const struct constant_table *alt_keys; - u8 nr_alt_keys; - - Table of additional key names and their mappings to parameter ID plus the - number of elements in the table. This is optional. The table is just an - array of { name, integer } pairs, e.g.: + fs_param_v_optional The value is optional + fs_param_neg_with_no result->negated set if key is prefixed with "no" + fs_param_neg_with_empty result->negated set if value is "" + fs_param_deprecated The parameter is deprecated. - static const struct constant_table afs_param_keys[] = { - { "baz", Opt_bar }, - { "dynamic", Opt_dyn }, + These are wrapped with a number of convenience wrappers: + + MACRO SPECIFIES + ======================= =============================================== + fsparam_flag() fs_param_is_flag + fsparam_flag_no() fs_param_is_flag, fs_param_neg_with_no + fsparam_bool() fs_param_is_bool + fsparam_u32() fs_param_is_u32 + fsparam_u32oct() fs_param_is_u32_octal + fsparam_u32hex() fs_param_is_u32_hex + fsparam_s32() fs_param_is_s32 + fsparam_u64() fs_param_is_u64 + fsparam_enum() fs_param_is_enum + fsparam_string() fs_param_is_string + fsparam_blob() fs_param_is_blob + fsparam_bdev() fs_param_is_blockdev + fsparam_path() fs_param_is_path + fsparam_fd() fs_param_is_fd + + all of which take two arguments, name string and option number - for + example: + + static const struct fs_parameter_spec afs_param_specs[] = { + fsparam_flag ("autocell", Opt_autocell), + fsparam_flag ("dyn", Opt_dyn), + fsparam_string ("source", Opt_source), + fsparam_flag_no ("foo", Opt_foo), + {} }; - [!] NOTE that the table must be sorted such that strcmp() can be used with - bsearch() to search the entries. - - The parameter ID can also be fs_param_key_removed to indicate that a - deprecated parameter has been removed and that an error will be given. - This differs from fs_param_deprecated where the parameter may still have - an effect. - - Further, the behaviour of the parameter may differ when an alternate name - is used (for instance with NFS, "v3", "v4.2", etc. are alternate names). + An addition macro, __fsparam() is provided that takes an additional pair + of arguments to specify the type and the flags for anything that doesn't + match one of the above macros. (6) const struct fs_parameter_enum *enums; - u8 nr_enums; - Table of enum value names to integer mappings and the number of elements - stored therein. This is of type: + Table of enum value names to integer mappings, terminated with a null + entry. This is of type: struct fs_parameter_enum { - u8 param_id; + u8 opt; char name[14]; u8 value; }; @@ -621,11 +650,6 @@ The members are as follows: try to look the value up in the enum table and the result will be stored in the parse result. - (7) bool no_source; - - If this is set, fs_parse() will ignore any "source" parameter and not - pass it to the filesystem. - The parser should be pointed to by the parser pointer in the file_system_type struct as this will provide validation on registration (if CONFIG_VALIDATE_FS_PARSER=y) and will allow the description to be queried from @@ -650,9 +674,8 @@ process the parameters it is given. int value; }; - and it must be sorted such that it can be searched using bsearch() using - strcmp(). If a match is found, the corresponding value is returned. If a - match isn't found, the not_found value is returned instead. + If a match is found, the corresponding value is returned. If a match + isn't found, the not_found value is returned instead. (*) bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, @@ -665,36 +688,36 @@ process the parameters it is given. should just be set to lie inside the low-to-high range. If all is good, true is returned. If the table is invalid, errors are - logged to dmesg, the stack is dumped and false is returned. + logged to dmesg and false is returned. + + (*) bool fs_validate_description(const struct fs_parameter_description *desc); + + This performs some validation checks on a parameter description. It + returns true if the description is good and false if it is not. It will + log errors to dmesg if validation fails. (*) int fs_parse(struct fs_context *fc, - const struct fs_param_parser *parser, + const struct fs_parameter_description *desc, struct fs_parameter *param, - struct fs_param_parse_result *result); + struct fs_parse_result *result); This is the main interpreter of parameters. It uses the parameter - description (parser) to look up the name of the parameter to use and to - convert that to a parameter ID (stored in result->key). + description to look up a parameter by key name and to convert that to an + option number (which it returns). If successful, and if the parameter type indicates the result is a boolean, integer or enum type, the value is converted by this function and - the result stored in result->{boolean,int_32,uint_32}. + the result stored in result->{boolean,int_32,uint_32,uint_64}. If a match isn't initially made, the key is prefixed with "no" and no value is present then an attempt will be made to look up the key with the prefix removed. If this matches a parameter for which the type has flag - fs_param_neg_with_no set, then a match will be made and the value will be - set to false/0/NULL. - - If the parameter is successfully matched and, optionally, parsed - correctly, 1 is returned. If the parameter isn't matched and - parser->ignore_unknown is set, then 0 is returned. Otherwise -EINVAL is - returned. - - (*) bool fs_validate_description(const struct fs_parameter_description *desc); + fs_param_neg_with_no set, then a match will be made and result->negated + will be set to true. - This is validates the parameter description. It returns true if the - description is good and false if it is not. + If the parameter isn't matched, -ENOPARAM will be returned; if the + parameter is matched, but the value is erroneous, -EINVAL will be + returned; otherwise the parameter's option number will be returned. (*) int fs_lookup_param(struct fs_context *fc, struct fs_parameter *value, -- cgit From 8c7ae38d1ce12a0eaeba655df8562552b3596c7f Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 27 Mar 2019 22:48:02 +0000 Subject: afs: Fix StoreData op marshalling The marshalling of AFS.StoreData, AFS.StoreData64 and YFS.StoreData64 calls generated by ->setattr() ops for the purpose of expanding a file is incorrect due to older documentation incorrectly describing the way the RPC 'FileLength' parameter is meant to work. The older documentation says that this is the length the file is meant to end up at the end of the operation; however, it was never implemented this way in any of the servers, but rather the file is truncated down to this before the write operation is effected, and never expanded to it (and, indeed, it was renamed to 'TruncPos' in 2014). Fix this by setting the position parameter to the new file length and doing a zero-lengh write there. The bug causes Xwayland to SIGBUS due to unexpected non-expansion of a file it then mmaps. This can be tested by giving the following test program a filename in an AFS directory: #include #include #include #include #include int main(int argc, char *argv[]) { char *p; int fd; if (argc != 2) { fprintf(stderr, "Format: test-trunc-mmap \n"); exit(2); } fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC); if (fd < 0) { perror(argv[1]); exit(1); } if (ftruncate(fd, 0x140008) == -1) { perror("ftruncate"); exit(1); } p = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (p == MAP_FAILED) { perror("mmap"); exit(1); } p[0] = 'a'; if (munmap(p, 4096) < 0) { perror("munmap"); exit(1); } if (close(fd) < 0) { perror("close"); exit(1); } exit(0); } Fixes: 31143d5d515e ("AFS: implement basic file write support") Reported-by: Jonathan Billings Tested-by: Jonathan Billings Signed-off-by: David Howells Signed-off-by: Linus Torvalds --- fs/afs/fsclient.c | 6 +++--- fs/afs/yfsclient.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index ca08c83168f5..0b37867b5c20 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -1515,8 +1515,8 @@ static int afs_fs_setattr_size64(struct afs_fs_cursor *fc, struct iattr *attr) xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ - *bp++ = 0; + *bp++ = htonl(attr->ia_size >> 32); /* position of start of write */ + *bp++ = htonl((u32) attr->ia_size); *bp++ = 0; /* size of write */ *bp++ = 0; *bp++ = htonl(attr->ia_size >> 32); /* new file length */ @@ -1564,7 +1564,7 @@ static int afs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) xdr_encode_AFS_StoreStatus(&bp, attr); - *bp++ = 0; /* position of start of write */ + *bp++ = htonl(attr->ia_size); /* position of start of write */ *bp++ = 0; /* size of write */ *bp++ = htonl(attr->ia_size); /* new file length */ diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 5aa57929e8c2..6e97a42d24d1 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -1514,7 +1514,7 @@ static int yfs_fs_setattr_size(struct afs_fs_cursor *fc, struct iattr *attr) bp = xdr_encode_u32(bp, 0); /* RPC flags */ bp = xdr_encode_YFSFid(bp, &vnode->fid); bp = xdr_encode_YFS_StoreStatus(bp, attr); - bp = xdr_encode_u64(bp, 0); /* position of start of write */ + bp = xdr_encode_u64(bp, attr->ia_size); /* position of start of write */ bp = xdr_encode_u64(bp, 0); /* size of write */ bp = xdr_encode_u64(bp, attr->ia_size); /* new file length */ yfs_check_req(call, bp); -- cgit From f6027c81099e87516d27d123867f10abd04b2d38 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Thu, 28 Mar 2019 16:49:48 +0100 Subject: x86/cpufeature: Fix __percpu annotation in this_cpu_has() &cpu_info.x86_capability is __percpu, and the second argument of x86_this_cpu_test_bit() is expected to be __percpu. Don't cast the __percpu away and then implicitly add it again. This gets rid of 106 lines of sparse warnings with the kernel config I'm using. Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Masahiro Yamada Cc: Nadav Amit Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190328154948.152273-1-jannh@google.com --- arch/x86/include/asm/cpufeature.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index ce95b8cbd229..0e56ff7e4848 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -112,8 +112,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; test_cpu_cap(c, bit)) #define this_cpu_has(bit) \ - (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ - x86_this_cpu_test_bit(bit, (unsigned long *)&cpu_info.x86_capability)) + (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ + x86_this_cpu_test_bit(bit, \ + (unsigned long __percpu *)&cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel -- cgit From e5545c94e43b8f6599ffc01df8d1aedf18ee912a Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 25 Mar 2019 23:32:08 -0700 Subject: gpio: of: Check propname before applying "cs-gpios" quirks SPI GPIO device has more than just "cs-gpio" property in its node and would request those GPIOs as a part of its initialization. To avoid applying CS-specific quirk to all of them add a check to make sure that propname is "cs-gpios". Signed-off-by: Andrey Smirnov Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Chris Healy Cc: linux-gpio@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 8b9c3ab70f6e..ee7f08386a72 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -120,7 +120,8 @@ static void of_gpio_flags_quirks(struct device_node *np, * to determine if the flags should have inverted semantics. */ if (IS_ENABLED(CONFIG_SPI_MASTER) && - of_property_read_bool(np, "cs-gpios")) { + of_property_read_bool(np, "cs-gpios") && + !strcmp(propname, "cs-gpios")) { struct device_node *child; u32 cs; int ret; -- cgit From 7ce40277bf848391705011ba37eac2e377cbd9e6 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 25 Mar 2019 23:32:09 -0700 Subject: gpio: of: Check for "spi-cs-high" in child instead of parent node "spi-cs-high" is going to be specified in child node of an SPI controller's representing attached SPI device, so change the code to look for it there, instead of checking parent node. Signed-off-by: Andrey Smirnov Cc: Linus Walleij Cc: Bartosz Golaszewski Cc: Chris Healy Cc: linux-gpio@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index ee7f08386a72..0220dd6d64ed 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -143,16 +143,16 @@ static void of_gpio_flags_quirks(struct device_node *np, * conflict and the "spi-cs-high" flag will * take precedence. */ - if (of_property_read_bool(np, "spi-cs-high")) { + if (of_property_read_bool(child, "spi-cs-high")) { if (*flags & OF_GPIO_ACTIVE_LOW) { pr_warn("%s GPIO handle specifies active low - ignored\n", - of_node_full_name(np)); + of_node_full_name(child)); *flags &= ~OF_GPIO_ACTIVE_LOW; } } else { if (!(*flags & OF_GPIO_ACTIVE_LOW)) pr_info("%s enforce active low on chipselect handle\n", - of_node_full_name(np)); + of_node_full_name(child)); *flags |= OF_GPIO_ACTIVE_LOW; } break; -- cgit From 552c69b1dc714854a5f4e27d37a43c6d797adf7d Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 7 Mar 2019 15:27:43 -0800 Subject: KVM: nVMX: Do not inherit quadrant and invalid for the root shadow EPT Explicitly zero out quadrant and invalid instead of inheriting them from the root_mmu. Functionally, this patch is a nop as we (should) never set quadrant for a direct mapped (EPT) root_mmu and nested EPT is only allowed if EPT is used for L1, and the root_mmu will never be invalid at this point. Explicitly setting flags sets the stage for repurposing the legacy paging bits in role, e.g. nxe, cr0_wp, and sm{a,e}p_andnot_wp, at which point 'smm' would be the only flag to be inherited from root_mmu. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7837ab001d80..01bb090aaa5c 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4918,11 +4918,15 @@ static union kvm_mmu_role kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly) { - union kvm_mmu_role role; + union kvm_mmu_role role = {0}; + union kvm_mmu_page_role root_base = vcpu->arch.root_mmu.mmu_role.base; - /* Base role is inherited from root_mmu */ - role.base.word = vcpu->arch.root_mmu.mmu_role.base.word; - role.ext = kvm_calc_mmu_role_ext(vcpu); + /* Legacy paging and SMM flags are inherited from root_mmu */ + role.base.smm = root_base.smm; + role.base.nxe = root_base.nxe; + role.base.cr0_wp = root_base.cr0_wp; + role.base.smep_andnot_wp = root_base.smep_andnot_wp; + role.base.smap_andnot_wp = root_base.smap_andnot_wp; role.base.level = PT64_ROOT_4LEVEL; role.base.direct = false; @@ -4930,6 +4934,7 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, role.base.guest_mode = true; role.base.access = ACC_ALL; + role.ext = kvm_calc_mmu_role_ext(vcpu); role.ext.execonly = execonly; return role; -- cgit From 47c42e6b4192a2ac8b6c9858ebcf400a9eff7a10 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 7 Mar 2019 15:27:44 -0800 Subject: KVM: x86: fix handling of role.cr4_pae and rename it to 'gpte_size' The cr4_pae flag is a bit of a misnomer, its purpose is really to track whether the guest PTE that is being shadowed is a 4-byte entry or an 8-byte entry. Prior to supporting nested EPT, the size of the gpte was reflected purely by CR4.PAE. KVM fudged things a bit for direct sptes, but it was mostly harmless since the size of the gpte never mattered. Now that a spte may be tracking an indirect EPT entry, relying on CR4.PAE is wrong and ill-named. For direct shadow pages, force the gpte_size to '1' as they are always 8-byte entries; EPT entries can only be 8-bytes and KVM always uses 8-byte entries for NPT and its identity map (when running with EPT but not unrestricted guest). Likewise, nested EPT entries are always 8-bytes. Nested EPT presents a unique scenario as the size of the entries are not dictated by CR4.PAE, but neither is the shadow page a direct map. To handle this scenario, set cr0_wp=1 and smap_andnot_wp=1, an otherwise impossible combination, to denote a nested EPT shadow page. Use the information to avoid incorrectly zapping an unsync'd indirect page in __kvm_sync_page(). Providing a consistent and accurate gpte_size fixes a bug reported by Vitaly where fast_cr3_switch() always fails when switching from L2 to L1 as kvm_mmu_get_page() would force role.cr4_pae=0 for direct pages, whereas kvm_calc_mmu_role_common() would set it according to CR4.PAE. Fixes: 7dcd575520082 ("x86/kvm/mmu: check if tdp/shadow MMU reconfiguration is needed") Reported-by: Vitaly Kuznetsov Tested-by: Vitaly Kuznetsov Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/mmu.txt | 11 +++++++---- arch/x86/include/asm/kvm_host.h | 4 ++-- arch/x86/kvm/mmu.c | 38 ++++++++++++++++++++++++-------------- arch/x86/kvm/mmutrace.h | 4 ++-- 4 files changed, 35 insertions(+), 22 deletions(-) diff --git a/Documentation/virtual/kvm/mmu.txt b/Documentation/virtual/kvm/mmu.txt index f365102c80f5..2efe0efc516e 100644 --- a/Documentation/virtual/kvm/mmu.txt +++ b/Documentation/virtual/kvm/mmu.txt @@ -142,7 +142,7 @@ Shadow pages contain the following information: If clear, this page corresponds to a guest page table denoted by the gfn field. role.quadrant: - When role.cr4_pae=0, the guest uses 32-bit gptes while the host uses 64-bit + When role.gpte_is_8_bytes=0, the guest uses 32-bit gptes while the host uses 64-bit sptes. That means a guest page table contains more ptes than the host, so multiple shadow pages are needed to shadow one guest page. For first-level shadow pages, role.quadrant can be 0 or 1 and denotes the @@ -158,9 +158,9 @@ Shadow pages contain the following information: The page is invalid and should not be used. It is a root page that is currently pinned (by a cpu hardware register pointing to it); once it is unpinned it will be destroyed. - role.cr4_pae: - Contains the value of cr4.pae for which the page is valid (e.g. whether - 32-bit or 64-bit gptes are in use). + role.gpte_is_8_bytes: + Reflects the size of the guest PTE for which the page is valid, i.e. '1' + if 64-bit gptes are in use, '0' if 32-bit gptes are in use. role.nxe: Contains the value of efer.nxe for which the page is valid. role.cr0_wp: @@ -173,6 +173,9 @@ Shadow pages contain the following information: Contains the value of cr4.smap && !cr0.wp for which the page is valid (pages for which this is true are different from other pages; see the treatment of cr0.wp=0 below). + role.ept_sp: + This is a virtual flag to denote a shadowed nested EPT page. ept_sp + is true if "cr0_wp && smap_andnot_wp", an otherwise invalid combination. role.smm: Is 1 if the page is valid in system management mode. This field determines which of the kvm_memslots array was used to build this diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a5db4475e72d..88f5192ce05e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -253,14 +253,14 @@ struct kvm_mmu_memory_cache { * kvm_memory_slot.arch.gfn_track which is 16 bits, so the role bits used * by indirect shadow page can not be more than 15 bits. * - * Currently, we used 14 bits that are @level, @cr4_pae, @quadrant, @access, + * Currently, we used 14 bits that are @level, @gpte_is_8_bytes, @quadrant, @access, * @nxe, @cr0_wp, @smep_andnot_wp and @smap_andnot_wp. */ union kvm_mmu_page_role { u32 word; struct { unsigned level:4; - unsigned cr4_pae:1; + unsigned gpte_is_8_bytes:1; unsigned quadrant:2; unsigned direct:1; unsigned access:3; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 01bb090aaa5c..776a58b00682 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -182,7 +182,7 @@ struct kvm_shadow_walk_iterator { static const union kvm_mmu_page_role mmu_base_role_mask = { .cr0_wp = 1, - .cr4_pae = 1, + .gpte_is_8_bytes = 1, .nxe = 1, .smep_andnot_wp = 1, .smap_andnot_wp = 1, @@ -2205,6 +2205,7 @@ static bool kvm_mmu_prepare_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp, static void kvm_mmu_commit_zap_page(struct kvm *kvm, struct list_head *invalid_list); + #define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ @@ -2215,12 +2216,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, for_each_valid_sp(_kvm, _sp, _gfn) \ if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else +static inline bool is_ept_sp(struct kvm_mmu_page *sp) +{ + return sp->role.cr0_wp && sp->role.smap_andnot_wp; +} + /* @sp->gfn should be write-protected at the call site */ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, struct list_head *invalid_list) { - if (sp->role.cr4_pae != !!is_pae(vcpu) - || vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { + if ((!is_ept_sp(sp) && sp->role.gpte_is_8_bytes != !!is_pae(vcpu)) || + vcpu->arch.mmu->sync_page(vcpu, sp) == 0) { kvm_mmu_prepare_zap_page(vcpu->kvm, sp, invalid_list); return false; } @@ -2423,7 +2429,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, role.level = level; role.direct = direct; if (role.direct) - role.cr4_pae = 0; + role.gpte_is_8_bytes = true; role.access = access; if (!vcpu->arch.mmu->direct_map && vcpu->arch.mmu->root_level <= PT32_ROOT_LEVEL) { @@ -4794,7 +4800,6 @@ static union kvm_mmu_role kvm_calc_mmu_role_common(struct kvm_vcpu *vcpu, role.base.access = ACC_ALL; role.base.nxe = !!is_nx(vcpu); - role.base.cr4_pae = !!is_pae(vcpu); role.base.cr0_wp = is_write_protection(vcpu); role.base.smm = is_smm(vcpu); role.base.guest_mode = is_guest_mode(vcpu); @@ -4815,6 +4820,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) role.base.ad_disabled = (shadow_accessed_mask == 0); role.base.level = kvm_x86_ops->get_tdp_level(vcpu); role.base.direct = true; + role.base.gpte_is_8_bytes = true; return role; } @@ -4879,6 +4885,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) role.base.smap_andnot_wp = role.ext.cr4_smap && !is_write_protection(vcpu); role.base.direct = !is_paging(vcpu); + role.base.gpte_is_8_bytes = !!is_pae(vcpu); if (!is_long_mode(vcpu)) role.base.level = PT32E_ROOT_LEVEL; @@ -4919,21 +4926,24 @@ kvm_calc_shadow_ept_root_page_role(struct kvm_vcpu *vcpu, bool accessed_dirty, bool execonly) { union kvm_mmu_role role = {0}; - union kvm_mmu_page_role root_base = vcpu->arch.root_mmu.mmu_role.base; - /* Legacy paging and SMM flags are inherited from root_mmu */ - role.base.smm = root_base.smm; - role.base.nxe = root_base.nxe; - role.base.cr0_wp = root_base.cr0_wp; - role.base.smep_andnot_wp = root_base.smep_andnot_wp; - role.base.smap_andnot_wp = root_base.smap_andnot_wp; + /* SMM flag is inherited from root_mmu */ + role.base.smm = vcpu->arch.root_mmu.mmu_role.base.smm; role.base.level = PT64_ROOT_4LEVEL; + role.base.gpte_is_8_bytes = true; role.base.direct = false; role.base.ad_disabled = !accessed_dirty; role.base.guest_mode = true; role.base.access = ACC_ALL; + /* + * WP=1 and NOT_WP=1 is an impossible combination, use WP and the + * SMAP variation to denote shadow EPT entries. + */ + role.base.cr0_wp = true; + role.base.smap_andnot_wp = true; + role.ext = kvm_calc_mmu_role_ext(vcpu); role.ext.execonly = execonly; @@ -5184,7 +5194,7 @@ static bool detect_write_misaligned(struct kvm_mmu_page *sp, gpa_t gpa, gpa, bytes, sp->role.word); offset = offset_in_page(gpa); - pte_size = sp->role.cr4_pae ? 8 : 4; + pte_size = sp->role.gpte_is_8_bytes ? 8 : 4; /* * Sometimes, the OS only writes the last one bytes to update status @@ -5208,7 +5218,7 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte) page_offset = offset_in_page(gpa); level = sp->role.level; *nspte = 1; - if (!sp->role.cr4_pae) { + if (!sp->role.gpte_is_8_bytes) { page_offset <<= 1; /* 32->64 */ /* * A 32-bit pde maps 4MB while the shadow pdes map diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9f6c855a0043..dd30dccd2ad5 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -29,10 +29,10 @@ \ role.word = __entry->role; \ \ - trace_seq_printf(p, "sp gfn %llx l%u%s q%u%s %s%s" \ + trace_seq_printf(p, "sp gfn %llx l%u %u-byte q%u%s %s%s" \ " %snxe %sad root %u %s%c", \ __entry->gfn, role.level, \ - role.cr4_pae ? " pae" : "", \ + role.gpte_is_8_bytes ? 8 : 4, \ role.quadrant, \ role.direct ? " direct" : "", \ access_str[role.access], \ -- cgit From 5e124900c6ebf5dfbe31b7b67073a64b2da14967 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 Feb 2019 12:48:38 -0800 Subject: KVM: doc: Fix incorrect word ordering regarding supported use of APIs Per Paolo[1], instantiating multiple VMs in a single process is legal; but this conflicts with KVM's API documentation, which states: The only supported use is one virtual machine per process, and one vcpu per thread. However, an earlier section in the documentation states: Only run VM ioctls from the same process (address space) that was used to create the VM. and: Only run vcpu ioctls from the same thread that was used to create the vcpu. This suggests that the conflicting documentation is simply an incorrect ordering of of words, i.e. what's really meant is that a virtual machine can't be shared across multiple processes and a vCPU can't be shared across multiple threads. Tweak the blurb on issuing ioctls to use a more assertive tone, and rewrite the "supported use" sentence to reference said blurb instead of poorly restating it in different terms. Opportunistically add missing punctuation. [1] https://lkml.kernel.org/r/f23265d4-528e-3bd4-011f-4d7b8f3281db@redhat.com Fixes: 9c1b96e34717 ("KVM: Document basic API") Signed-off-by: Sean Christopherson [Improve notes on asynchronous ioctl] Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7de9eee73fcd..158805532083 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -5,24 +5,26 @@ The Definitive KVM (Kernel-based Virtual Machine) API Documentation ---------------------- The kvm API is a set of ioctls that are issued to control various aspects -of a virtual machine. The ioctls belong to three classes +of a virtual machine. The ioctls belong to three classes: - System ioctls: These query and set global attributes which affect the whole kvm subsystem. In addition a system ioctl is used to create - virtual machines + virtual machines. - VM ioctls: These query and set attributes that affect an entire virtual machine, for example memory layout. In addition a VM ioctl is used to create virtual cpus (vcpus). - Only run VM ioctls from the same process (address space) that was used - to create the VM. + VM ioctls must be issued from the same process (address space) that was + used to create the VM. - vcpu ioctls: These query and set attributes that control the operation of a single virtual cpu. - Only run vcpu ioctls from the same thread that was used to create the - vcpu. + vcpu ioctls should be issued from the same thread that was used to create + the vcpu, except for asynchronous vcpu ioctl that are marked as such in + the documentation. Otherwise, the first ioctl after switching threads + could see a performance impact. 2. File descriptors @@ -41,9 +43,8 @@ In general file descriptors can be migrated among processes by means of fork() and the SCM_RIGHTS facility of unix domain socket. These kinds of tricks are explicitly not supported by kvm. While they will not cause harm to the host, their actual behavior is not guaranteed by -the API. The only supported use is one virtual machine per process, -and one vcpu per thread. - +the API. See "General description" for details on the ioctl usage +model that is supported by KVM. It is important to note that althought VM ioctls may only be issued from the process that created the VM, a VM's lifecycle is associated with its @@ -515,11 +516,15 @@ c) KVM_INTERRUPT_SET_LEVEL Note that any value for 'irq' other than the ones stated above is invalid and incurs unexpected behavior. +This is an asynchronous vcpu ioctl and can be invoked from any thread. + MIPS: Queues an external interrupt to be injected into the virtual CPU. A negative interrupt number dequeues the interrupt. +This is an asynchronous vcpu ioctl and can be invoked from any thread. + 4.17 KVM_DEBUG_GUEST @@ -2493,7 +2498,7 @@ KVM_S390_MCHK (vm, vcpu) - machine check interrupt; cr 14 bits in parm, machine checks needing further payload are not supported by this ioctl) -Note that the vcpu ioctl is asynchronous to vcpu execution. +This is an asynchronous vcpu ioctl and can be invoked from any thread. 4.78 KVM_PPC_GET_HTAB_FD @@ -3042,8 +3047,7 @@ KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall KVM_S390_MCHK - machine check interrupt; parameters in .mchk - -Note that the vcpu ioctl is asynchronous to vcpu execution. +This is an asynchronous vcpu ioctl and can be invoked from any thread. 4.94 KVM_S390_GET_IRQ_STATE -- cgit From ddba91801aeb5c160b660caed1800eb3aef403f8 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 Feb 2019 12:48:39 -0800 Subject: KVM: Reject device ioctls from processes other than the VM's creator KVM's API requires thats ioctls must be issued from the same process that created the VM. In other words, userspace can play games with a VM's file descriptors, e.g. fork(), SCM_RIGHTS, etc..., but only the creator can do anything useful. Explicitly reject device ioctls that are issued by a process other than the VM's creator, and update KVM's API documentation to extend its requirements to device ioctls. Fixes: 852b6d57dc7f ("kvm: add device control API") Cc: Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 16 +++++++++++----- virt/kvm/kvm_main.c | 3 +++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 158805532083..88ecbc7645db 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -13,7 +13,7 @@ of a virtual machine. The ioctls belong to three classes: - VM ioctls: These query and set attributes that affect an entire virtual machine, for example memory layout. In addition a VM ioctl is used to - create virtual cpus (vcpus). + create virtual cpus (vcpus) and devices. VM ioctls must be issued from the same process (address space) that was used to create the VM. @@ -26,6 +26,11 @@ of a virtual machine. The ioctls belong to three classes: the documentation. Otherwise, the first ioctl after switching threads could see a performance impact. + - device ioctls: These query and set attributes that control the operation + of a single device. + + device ioctls must be issued from the same process (address space) that + was used to create the VM. 2. File descriptors ------------------- @@ -34,10 +39,11 @@ The kvm API is centered around file descriptors. An initial open("/dev/kvm") obtains a handle to the kvm subsystem; this handle can be used to issue system ioctls. A KVM_CREATE_VM ioctl on this handle will create a VM file descriptor which can be used to issue VM -ioctls. A KVM_CREATE_VCPU ioctl on a VM fd will create a virtual cpu -and return a file descriptor pointing to it. Finally, ioctls on a vcpu -fd can be used to control the vcpu, including the important task of -actually running guest code. +ioctls. A KVM_CREATE_VCPU or KVM_CREATE_DEVICE ioctl on a VM fd will +create a virtual cpu or device and return a file descriptor pointing to +the new resource. Finally, ioctls on a vcpu or device fd can be used +to control the vcpu or device. For vcpus, this includes the important +task of actually running guest code. In general file descriptors can be migrated among processes by means of fork() and the SCM_RIGHTS facility of unix domain socket. These diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f25aa98a94df..55fe8e20d8fd 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2905,6 +2905,9 @@ static long kvm_device_ioctl(struct file *filp, unsigned int ioctl, { struct kvm_device *dev = filp->private_data; + if (dev->kvm->mm != current->mm) + return -EIO; + switch (ioctl) { case KVM_SET_DEVICE_ATTR: return kvm_device_ioctl_attr(dev, dev->ops->set_attr, arg); -- cgit From 05d5a48635259e621ea26d01e8316c6feeb34190 Mon Sep 17 00:00:00 2001 From: "Singh, Brijesh" Date: Fri, 15 Feb 2019 17:24:12 +0000 Subject: KVM: SVM: Workaround errata#1096 (insn_len maybe zero on SMAP violation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Errata#1096: On a nested data page fault when CR.SMAP=1 and the guest data read generates a SMAP violation, GuestInstrBytes field of the VMCB on a VMEXIT will incorrectly return 0h instead the correct guest instruction bytes . Recommend Workaround: To determine what instruction the guest was executing the hypervisor will have to decode the instruction at the instruction pointer. The recommended workaround can not be implemented for the SEV guest because guest memory is encrypted with the guest specific key, and instruction decoder will not be able to decode the instruction bytes. If we hit this errata in the SEV guest then log the message and request a guest shutdown. Reported-by: Venkatesh Srinivas Cc: Jim Mattson Cc: Tom Lendacky Cc: Borislav Petkov Cc: Joerg Roedel Cc: "Radim Krčmář" Cc: Paolo Bonzini Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/mmu.c | 8 +++++--- arch/x86/kvm/svm.c | 32 ++++++++++++++++++++++++++++++++ arch/x86/kvm/vmx/vmx.c | 6 ++++++ 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 88f5192ce05e..5b03006c00be 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1192,6 +1192,8 @@ struct kvm_x86_ops { int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); + + bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); }; struct kvm_arch_async_pf { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 776a58b00682..f6d760dcdb75 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5408,10 +5408,12 @@ emulate: * This can happen if a guest gets a page-fault on data access but the HW * table walker is not able to read the instruction page (e.g instruction * page is not present in memory). In those cases we simply restart the - * guest. + * guest, with the exception of AMD Erratum 1096 which is unrecoverable. */ - if (unlikely(insn && !insn_len)) - return 1; + if (unlikely(insn && !insn_len)) { + if (!kvm_x86_ops->need_emulation_on_page_fault(vcpu)) + return 1; + } er = x86_emulate_instruction(vcpu, cr2, emulation_type, insn, insn_len); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b5b128a0a051..426039285fd1 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -7098,6 +7098,36 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, return -ENODEV; } +static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + bool is_user, smap; + + is_user = svm_get_cpl(vcpu) == 3; + smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); + + /* + * Detect and workaround Errata 1096 Fam_17h_00_0Fh + * + * In non SEV guest, hypervisor will be able to read the guest + * memory to decode the instruction pointer when insn_len is zero + * so we return true to indicate that decoding is possible. + * + * But in the SEV guest, the guest memory is encrypted with the + * guest specific key and hypervisor will not be able to decode the + * instruction pointer so we will not able to workaround it. Lets + * print the error and request to kill the guest. + */ + if (is_user && smap) { + if (!sev_guest(vcpu->kvm)) + return true; + + pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); + kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); + } + + return false; +} + static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .cpu_has_kvm_support = has_svm, .disabled_by_bios = is_disabled, @@ -7231,6 +7261,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .nested_enable_evmcs = nested_enable_evmcs, .nested_get_evmcs_version = nested_get_evmcs_version, + + .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c73375e01ab8..6aa84e09217b 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7409,6 +7409,11 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) return 0; } +static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) +{ + return 0; +} + static __init int hardware_setup(void) { unsigned long host_bndcfgs; @@ -7711,6 +7716,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .set_nested_state = NULL, .get_vmcs12_pages = NULL, .nested_enable_evmcs = NULL, + .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, }; static void vmx_cleanup_l1d_flush(void) -- cgit From 711eff3a8fa1d6193139a895524240912011b4dc Mon Sep 17 00:00:00 2001 From: Krish Sadhukhan Date: Thu, 7 Feb 2019 14:05:30 -0500 Subject: kvm: nVMX: Add a vmentry check for HOST_SYSENTER_ESP and HOST_SYSENTER_EIP fields According to section "Checks on VMX Controls" in Intel SDM vol 3C, the following check is performed on vmentry of L2 guests: On processors that support Intel 64 architecture, the IA32_SYSENTER_ESP field and the IA32_SYSENTER_EIP field must each contain a canonical address. Signed-off-by: Krish Sadhukhan Reviewed-by: Mihai Carabas Reviewed-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx/nested.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index f24a2c225070..153e539c29c9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2585,6 +2585,11 @@ static int nested_check_host_control_regs(struct kvm_vcpu *vcpu, !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || !nested_cr3_valid(vcpu, vmcs12->host_cr3)) return -EINVAL; + + if (is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu) || + is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)) + return -EINVAL; + /* * If the load IA32_EFER VM-exit control is 1, bits reserved in the * IA32_EFER MSR must be 0 in the field for that register. In addition, -- cgit From 4d66623cfba0949b2f0d669bd2ae732124c99ded Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 27 Sep 2018 08:31:26 +0800 Subject: KVM: x86: remove check on nr_mmu_pages in kvm_arch_commit_memory_region() * nr_mmu_pages would be non-zero only if kvm->arch.n_requested_mmu_pages is non-zero. * nr_mmu_pages is always non-zero, since kvm_mmu_calculate_mmu_pages() never return zero. Based on these two reasons, we can merge the two *if* clause and use the return value from kvm_mmu_calculate_mmu_pages() directly. This simplify the code and also eliminate the possibility for reader to believe nr_mmu_pages would be zero. Signed-off-by: Wei Yang Reviewed-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/x86.c | 8 ++------ 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5b03006c00be..679168931c40 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1254,7 +1254,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, gfn_t gfn_offset, unsigned long mask); void kvm_mmu_zap_all(struct kvm *kvm); void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen); -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); +unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f6d760dcdb75..5a9981465fbb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -6028,7 +6028,7 @@ out: /* * Calculate mmu pages needed for kvm. */ -unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm) +unsigned int kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm) { unsigned int nr_mmu_pages; unsigned int nr_pages = 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 65e4559eef2f..491e92383da8 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9429,13 +9429,9 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - int nr_mmu_pages = 0; - if (!kvm->arch.n_requested_mmu_pages) - nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); - - if (nr_mmu_pages) - kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); + kvm_mmu_change_mmu_pages(kvm, + kvm_mmu_calculate_default_mmu_pages(kvm)); /* * Dirty logging tracks sptes in 4k granularity, meaning that large -- cgit From 3d9683cf3bfb6d4e4605a153958dfca7e18b52f2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Mar 2019 18:08:12 +0900 Subject: KVM: export and iif KVM is supported I do not see any consistency about headers_install of and . According to my analysis of Linux 5.1-rc1, there are 3 groups: [1] Both and are exported alpha, arm, hexagon, mips, powerpc, s390, sparc, x86 [2] is exported, but is not arc, arm64, c6x, h8300, ia64, m68k, microblaze, nios2, openrisc, parisc, sh, unicore32, xtensa [3] Neither nor is exported csky, nds32, riscv This does not match to the actual KVM support. At least, [2] is half-baked. Nor do arch maintainers look like they care about this. For example, commit 0add53713b1c ("microblaze: Add missing kvm_para.h to Kbuild") exported to user-space in order to fix an in-kernel build error. We have two ways to make this consistent: [A] export both and for all architectures, irrespective of the KVM support [B] Match the header export of and to the KVM support My first attempt was [A] because the code looks cleaner, but Paolo suggested [B]. So, this commit goes with [B]. For most architectures, was moved to the kernel-space. I changed include/uapi/linux/Kbuild so that it checks generated asm/kvm_para.h as well as check-in ones. After this commit, there will be two groups: [1] Both and are exported arm, arm64, mips, powerpc, s390, x86 [2] Neither nor is exported alpha, arc, c6x, csky, h8300, hexagon, ia64, m68k, microblaze, nds32, nios2, openrisc, parisc, riscv, sh, sparc, unicore32, xtensa Signed-off-by: Masahiro Yamada Acked-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- arch/alpha/include/asm/Kbuild | 1 + arch/alpha/include/uapi/asm/kvm_para.h | 2 -- arch/arc/include/asm/Kbuild | 1 + arch/arc/include/uapi/asm/Kbuild | 1 - arch/arm/include/uapi/asm/Kbuild | 1 + arch/arm/include/uapi/asm/kvm_para.h | 2 -- arch/c6x/include/asm/Kbuild | 1 + arch/c6x/include/uapi/asm/Kbuild | 1 - arch/h8300/include/asm/Kbuild | 1 + arch/h8300/include/uapi/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 + arch/hexagon/include/uapi/asm/kvm_para.h | 2 -- arch/ia64/include/asm/Kbuild | 1 + arch/ia64/include/uapi/asm/Kbuild | 1 - arch/m68k/include/asm/Kbuild | 1 + arch/m68k/include/uapi/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 + arch/microblaze/include/uapi/asm/Kbuild | 1 - arch/nios2/include/asm/Kbuild | 1 + arch/nios2/include/uapi/asm/Kbuild | 1 - arch/openrisc/include/asm/Kbuild | 1 + arch/openrisc/include/uapi/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 + arch/parisc/include/uapi/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 + arch/sh/include/uapi/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 + arch/sparc/include/uapi/asm/kvm_para.h | 2 -- arch/unicore32/include/asm/Kbuild | 1 + arch/unicore32/include/uapi/asm/Kbuild | 1 - arch/xtensa/include/asm/Kbuild | 1 + arch/xtensa/include/uapi/asm/Kbuild | 1 - include/uapi/linux/Kbuild | 2 ++ 33 files changed, 18 insertions(+), 20 deletions(-) delete mode 100644 arch/alpha/include/uapi/asm/kvm_para.h delete mode 100644 arch/arm/include/uapi/asm/kvm_para.h delete mode 100644 arch/hexagon/include/uapi/asm/kvm_para.h delete mode 100644 arch/sparc/include/uapi/asm/kvm_para.h diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index dc0ab28baca1..70b783333965 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -6,6 +6,7 @@ generic-y += exec.h generic-y += export.h generic-y += fb.h generic-y += irq_work.h +generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h diff --git a/arch/alpha/include/uapi/asm/kvm_para.h b/arch/alpha/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/alpha/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#include diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index b41f8881ecc8..decc306a3b52 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += hardirq.h generic-y += hw_irq.h generic-y += irq_regs.h generic-y += irq_work.h +generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/arc/include/uapi/asm/Kbuild +++ b/arch/arc/include/uapi/asm/Kbuild @@ -1,2 +1 @@ -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild index 23b4464c0995..ce8573157774 100644 --- a/arch/arm/include/uapi/asm/Kbuild +++ b/arch/arm/include/uapi/asm/Kbuild @@ -3,3 +3,4 @@ generated-y += unistd-common.h generated-y += unistd-oabi.h generated-y += unistd-eabi.h +generic-y += kvm_para.h diff --git a/arch/arm/include/uapi/asm/kvm_para.h b/arch/arm/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/arm/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#include diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 63b4a1705182..249c9f6f26dc 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/c6x/include/uapi/asm/Kbuild +++ b/arch/c6x/include/uapi/asm/Kbuild @@ -1,2 +1 @@ -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 3e7c8ecf151e..e3dead402e5f 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += linkage.h generic-y += local.h generic-y += local64.h diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/h8300/include/uapi/asm/Kbuild +++ b/arch/h8300/include/uapi/asm/Kbuild @@ -1,2 +1 @@ -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index b25fd42aa0f4..d046e8ccdf78 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -19,6 +19,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/hexagon/include/uapi/asm/kvm_para.h b/arch/hexagon/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/hexagon/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#include diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 43e21fe3499c..11f191689c9e 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -2,6 +2,7 @@ generated-y += syscall_table.h generic-y += compat.h generic-y += exec.h generic-y += irq_work.h +generic-y += kvm_para.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += preempt.h diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild index 20018cb883a9..62a9522af51e 100644 --- a/arch/ia64/include/uapi/asm/Kbuild +++ b/arch/ia64/include/uapi/asm/Kbuild @@ -1,2 +1 @@ generated-y += unistd_64.h -generic-y += kvm_para.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 95f8f631c4df..2c359d9e80f6 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -13,6 +13,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild index 8a7ad40be463..7417847dc438 100644 --- a/arch/m68k/include/uapi/asm/Kbuild +++ b/arch/m68k/include/uapi/asm/Kbuild @@ -1,2 +1 @@ generated-y += unistd_32.h -generic-y += kvm_para.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 791cc8d54d0a..1a8285c3f693 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -17,6 +17,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += linkage.h generic-y += local.h generic-y += local64.h diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild index 3ce84fbb2678..13f59631c576 100644 --- a/arch/microblaze/include/uapi/asm/Kbuild +++ b/arch/microblaze/include/uapi/asm/Kbuild @@ -1,3 +1,2 @@ generated-y += unistd_32.h -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 8fde4fa2c34f..88a667d12aaa 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -23,6 +23,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/nios2/include/uapi/asm/Kbuild +++ b/arch/nios2/include/uapi/asm/Kbuild @@ -1,2 +1 @@ -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 5a73e2956ac4..22aa97136c01 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -20,6 +20,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/openrisc/include/uapi/asm/Kbuild +++ b/arch/openrisc/include/uapi/asm/Kbuild @@ -1,2 +1 @@ -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 6f49e77d82a2..9bcd0c903dbb 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -11,6 +11,7 @@ generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild index 22fdbd08cdc8..2bd5b392277c 100644 --- a/arch/parisc/include/uapi/asm/Kbuild +++ b/arch/parisc/include/uapi/asm/Kbuild @@ -1,3 +1,2 @@ generated-y += unistd_32.h generated-y += unistd_64.h -generic-y += kvm_para.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index a6ef3fee5f85..7bf2cb680d32 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += emergency-restart.h generic-y += exec.h generic-y += irq_regs.h generic-y += irq_work.h +generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild index ecfbd40924dd..b8812c74c1de 100644 --- a/arch/sh/include/uapi/asm/Kbuild +++ b/arch/sh/include/uapi/asm/Kbuild @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += unistd_32.h -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index b82f64e28f55..a22cfd5c0ee8 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -9,6 +9,7 @@ generic-y += exec.h generic-y += export.h generic-y += irq_regs.h generic-y += irq_work.h +generic-y += kvm_para.h generic-y += linkage.h generic-y += local.h generic-y += local64.h diff --git a/arch/sparc/include/uapi/asm/kvm_para.h b/arch/sparc/include/uapi/asm/kvm_para.h deleted file mode 100644 index baacc4996d18..000000000000 --- a/arch/sparc/include/uapi/asm/kvm_para.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#include diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 1d1544b6ca74..d77d953c04c1 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -18,6 +18,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild index 755bb11323d8..1c72f04ff75d 100644 --- a/arch/unicore32/include/uapi/asm/Kbuild +++ b/arch/unicore32/include/uapi/asm/Kbuild @@ -1,2 +1 @@ -generic-y += kvm_para.h generic-y += ucontext.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 42b6cb3d16f7..3843198e03d4 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -15,6 +15,7 @@ generic-y += irq_work.h generic-y += kdebug.h generic-y += kmap_types.h generic-y += kprobes.h +generic-y += kvm_para.h generic-y += local.h generic-y += local64.h generic-y += mcs_spinlock.h diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild index 8a7ad40be463..7417847dc438 100644 --- a/arch/xtensa/include/uapi/asm/Kbuild +++ b/arch/xtensa/include/uapi/asm/Kbuild @@ -1,2 +1 @@ generated-y += unistd_32.h -generic-y += kvm_para.h diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 5f24b50c9e88..059dc2bedaf6 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -7,5 +7,7 @@ no-export-headers += kvm.h endif ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),) +ifeq ($(wildcard $(objtree)/arch/$(SRCARCH)/include/generated/uapi/asm/kvm_para.h),) no-export-headers += kvm_para.h endif +endif -- cgit From f285c633cb6d68d2bf3a8ad65bee3835aac9886c Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 12 Mar 2019 11:45:59 -0700 Subject: kvm: mmu: Used range based flushing in slot_handle_level_range Replace kvm_flush_remote_tlbs with kvm_flush_remote_tlbs_with_address in slot_handle_level_range. When range based flushes are not enabled kvm_flush_remote_tlbs_with_address falls back to kvm_flush_remote_tlbs. This changes the behavior of many functions that indirectly use slot_handle_level_range, iff the range based flushes are enabled. The only potential problem I see with this is that kvm->tlbs_dirty will be cleared less often, however the only caller of slot_handle_level_range that checks tlbs_dirty is kvm_mmu_notifier_invalidate_range_start which checks it and does a kvm_flush_remote_tlbs after calling kvm_unmap_hva_range anyway. Tested: Ran all kvm-unit-tests on a Intel Haswell machine with and without this patch. The patch introduced no new failures. Signed-off-by: Ben Gardon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5a9981465fbb..eee455a8a612 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5526,7 +5526,9 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, if (need_resched() || spin_needbreak(&kvm->mmu_lock)) { if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_with_address(kvm, + start_gfn, + iterator.gfn - start_gfn + 1); flush = false; } cond_resched_lock(&kvm->mmu_lock); @@ -5534,7 +5536,8 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot, } if (flush && lock_flush_tlb) { - kvm_flush_remote_tlbs(kvm); + kvm_flush_remote_tlbs_with_address(kvm, start_gfn, + end_gfn - start_gfn + 1); flush = false; } -- cgit From ca0488aadd014809862428cde896a6a6e8f13e42 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Mar 2019 18:58:15 +0100 Subject: kvm: don't redefine flags as something else MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function irqfd_wakeup() has flags defined as __poll_t and then it has additional flags which is used for irqflags. Redefine the inner flags variable as iflags so it does not shadow the outer flags. Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: kvm@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 4325250afd72..001aeda4c154 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -214,9 +214,9 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) if (flags & EPOLLHUP) { /* The eventfd is closing, detach from KVM */ - unsigned long flags; + unsigned long iflags; - spin_lock_irqsave(&kvm->irqfds.lock, flags); + spin_lock_irqsave(&kvm->irqfds.lock, iflags); /* * We must check if someone deactivated the irqfd before @@ -230,7 +230,7 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) if (irqfd_is_active(irqfd)) irqfd_deactivate(irqfd); - spin_unlock_irqrestore(&kvm->irqfds.lock, flags); + spin_unlock_irqrestore(&kvm->irqfds.lock, iflags); } return 0; -- cgit From 0cf9135b773bf32fba9dd8e6699c1b331ee4b749 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 7 Mar 2019 15:43:02 -0800 Subject: KVM: x86: Emulate MSR_IA32_ARCH_CAPABILITIES on AMD hosts The CPUID flag ARCH_CAPABILITIES is unconditioinally exposed to host userspace for all x86 hosts, i.e. KVM advertises ARCH_CAPABILITIES regardless of hardware support under the pretense that KVM fully emulates MSR_IA32_ARCH_CAPABILITIES. Unfortunately, only VMX hosts handle accesses to MSR_IA32_ARCH_CAPABILITIES (despite KVM_GET_MSRS also reporting MSR_IA32_ARCH_CAPABILITIES for all hosts). Move the MSR_IA32_ARCH_CAPABILITIES handling to common x86 code so that it's emulated on AMD hosts. Fixes: 1eaafe91a0df4 ("kvm: x86: IA32_ARCH_CAPABILITIES is always supported") Cc: stable@vger.kernel.org Reported-by: Xiaoyao Li Cc: Jim Mattson Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/vmx/vmx.c | 13 ------------- arch/x86/kvm/vmx/vmx.h | 1 - arch/x86/kvm/x86.c | 12 ++++++++++++ 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 679168931c40..264814f26ce0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -568,6 +568,7 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; u64 ia32_xss; u64 microcode_version; + u64 arch_capabilities; /* * Paging state of the vcpu diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6aa84e09217b..ab432a930ae8 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1683,12 +1683,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->spec_ctrl; break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) - return 1; - msr_info->data = to_vmx(vcpu)->arch_capabilities; - break; case MSR_IA32_SYSENTER_CS: msr_info->data = vmcs_read32(GUEST_SYSENTER_CS); break; @@ -1895,11 +1889,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx_disable_intercept_for_msr(vmx->vmcs01.msr_bitmap, MSR_IA32_PRED_CMD, MSR_TYPE_W); break; - case MSR_IA32_ARCH_CAPABILITIES: - if (!msr_info->host_initiated) - return 1; - vmx->arch_capabilities = data; - break; case MSR_IA32_CR_PAT: if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data)) @@ -4088,8 +4077,6 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - vmx->arch_capabilities = kvm_get_arch_capabilities(); - vm_exit_controls_init(vmx, vmx_vmexit_ctrl()); /* 22.2.1, 20.8.1 */ diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 1554cb45b393..a1e00d0a2482 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -190,7 +190,6 @@ struct vcpu_vmx { u64 msr_guest_kernel_gs_base; #endif - u64 arch_capabilities; u64 spec_ctrl; u32 vm_entry_controls_shadow; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 491e92383da8..9fc378531ca7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2443,6 +2443,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (msr_info->host_initiated) vcpu->arch.microcode_version = data; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + vcpu->arch.arch_capabilities = data; + break; case MSR_EFER: return set_efer(vcpu, data); case MSR_K7_HWCR: @@ -2747,6 +2752,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_UCODE_REV: msr_info->data = vcpu->arch.microcode_version; break; + case MSR_IA32_ARCH_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_ARCH_CAPABILITIES)) + return 1; + msr_info->data = vcpu->arch.arch_capabilities; + break; case MSR_IA32_TSC: msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; break; @@ -8733,6 +8744,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) { + vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; kvm_vcpu_mtrr_init(vcpu); vcpu_load(vcpu); -- cgit From 2bdb76c015df7125783d8394d6339d181cb5bc30 Mon Sep 17 00:00:00 2001 From: Xiaoyao Li Date: Fri, 8 Mar 2019 15:57:20 +0800 Subject: kvm/x86: Move MSR_IA32_ARCH_CAPABILITIES to array emulated_msrs Since MSR_IA32_ARCH_CAPABILITIES is emualted unconditionally even if host doesn't suppot it. We should move it to array emulated_msrs from arry msrs_to_save, to report to userspace that guest support this msr. Signed-off-by: Xiaoyao Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9fc378531ca7..9f279bb145e5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1125,7 +1125,7 @@ static u32 msrs_to_save[] = { #endif MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS, MSR_TSC_AUX, - MSR_IA32_SPEC_CTRL, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_SPEC_CTRL, MSR_IA32_RTIT_CTL, MSR_IA32_RTIT_STATUS, MSR_IA32_RTIT_CR3_MATCH, MSR_IA32_RTIT_OUTPUT_BASE, MSR_IA32_RTIT_OUTPUT_MASK, MSR_IA32_RTIT_ADDR0_A, MSR_IA32_RTIT_ADDR0_B, @@ -1158,6 +1158,7 @@ static u32 emulated_msrs[] = { MSR_IA32_TSC_ADJUST, MSR_IA32_TSCDEADLINE, + MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, -- cgit From 013cc6ebbf41496ce4badedd71ea6d4a6d198c14 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 13 Mar 2019 18:13:42 +0100 Subject: x86/kvm/hyper-v: avoid spurious pending stimer on vCPU init When userspace initializes guest vCPUs it may want to zero all supported MSRs including Hyper-V related ones including HV_X64_MSR_STIMERn_CONFIG/ HV_X64_MSR_STIMERn_COUNT. With commit f3b138c5d89a ("kvm/x86: Update SynIC timers on guest entry only") we began doing stimer_mark_pending() unconditionally on every config change. The issue I'm observing manifests itself as following: - Qemu writes 0 to STIMERn_{CONFIG,COUNT} MSRs and marks all stimers as pending in stimer_pending_bitmap, arms KVM_REQ_HV_STIMER; - kvm_hv_has_stimer_pending() starts returning true; - kvm_vcpu_has_events() starts returning true; - kvm_arch_vcpu_runnable() starts returning true; - when kvm_arch_vcpu_ioctl_run() gets into (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED) case: - kvm_vcpu_block() gets in 'kvm_vcpu_check_block(vcpu) < 0' and returns immediately, avoiding normal wait path; - -EAGAIN is returned from kvm_arch_vcpu_ioctl_run() immediately forcing userspace to retry. So instead of normal wait path we get a busy loop on all secondary vCPUs before they get INIT signal. This seems to be undesirable, especially given that this happens even when Hyper-V extensions are not used. Generally, it seems to be pointless to mark an stimer as pending in stimer_pending_bitmap and arm KVM_REQ_HV_STIMER as the only thing kvm_hv_process_stimers() will do is clear the corresponding bit. We may just not mark disabled timers as pending instead. Fixes: f3b138c5d89a ("kvm/x86: Update SynIC timers on guest entry only") Signed-off-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 27c43525a05f..421899f6ad7b 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -526,7 +526,9 @@ static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config, new_config.enable = 0; stimer->config.as_uint64 = new_config.as_uint64; - stimer_mark_pending(stimer, false); + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } @@ -542,7 +544,10 @@ static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count, stimer->config.enable = 0; else if (stimer->config.auto_enable) stimer->config.enable = 1; - stimer_mark_pending(stimer, false); + + if (stimer->config.enable) + stimer_mark_pending(stimer, false); + return 0; } -- cgit From 45def77ebf79e2e8942b89ed79294d97ce914fa0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Mon, 11 Mar 2019 20:01:05 -0700 Subject: KVM: x86: update %rip after emulating IO Most (all?) x86 platforms provide a port IO based reset mechanism, e.g. OUT 92h or CF9h. Userspace may emulate said mechanism, i.e. reset a vCPU in response to KVM_EXIT_IO, without explicitly announcing to KVM that it is doing a reset, e.g. Qemu jams vCPU state and resumes running. To avoid corruping %rip after such a reset, commit 0967b7bf1c22 ("KVM: Skip pio instruction when it is emulated, not executed") changed the behavior of PIO handlers, i.e. today's "fast" PIO handling to skip the instruction prior to exiting to userspace. Full emulation doesn't need such tricks becase re-emulating the instruction will naturally handle %rip being changed to point at the reset vector. Updating %rip prior to executing to userspace has several drawbacks: - Userspace sees the wrong %rip on the exit, e.g. if PIO emulation fails it will likely yell about the wrong address. - Single step exits to userspace for are effectively dropped as KVM_EXIT_DEBUG is overwritten with KVM_EXIT_IO. - Behavior of PIO emulation is different depending on whether it goes down the fast path or the slow path. Rather than skip the PIO instruction before exiting to userspace, snapshot the linear %rip and cancel PIO completion if the current value does not match the snapshot. For a 64-bit vCPU, i.e. the most common scenario, the snapshot and comparison has negligible overhead as VMCS.GUEST_RIP will be cached regardless, i.e. there is no extra VMREAD in this case. All other alternatives to snapshotting the linear %rip that don't rely on an explicit reset announcenment suffer from one corner case or another. For example, canceling PIO completion on any write to %rip fails if userspace does a save/restore of %rip, and attempting to avoid that issue by canceling PIO only if %rip changed then fails if PIO collides with the reset %rip. Attempting to zero in on the exact reset vector won't work for APs, which means adding more hooks such as the vCPU's MP_STATE, and so on and so forth. Checking for a linear %rip match technically suffers from corner cases, e.g. userspace could theoretically rewrite the underlying code page and expect a different instruction to execute, or the guest hardcodes a PIO reset at 0xfffffff0, but those are far, far outside of what can be considered normal operation. Fixes: 432baf60eee3 ("KVM: VMX: use kvm_fast_pio_in for handling IN I/O") Cc: Reported-by: Jim Mattson Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 36 ++++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 264814f26ce0..159b5988292f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -350,6 +350,7 @@ struct kvm_mmu_page { }; struct kvm_pio_request { + unsigned long linear_rip; unsigned long count; int in; int port; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9f279bb145e5..099b851dabaf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6535,14 +6535,27 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, } EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); +static int complete_fast_pio_out(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pio.count = 0; + + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) + return 1; + + return kvm_skip_emulated_instruction(vcpu); +} + static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port) { unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, size, port, &val, 1); - /* do not return to emulator after return from userspace */ - vcpu->arch.pio.count = 0; + + if (!ret) { + vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); + vcpu->arch.complete_userspace_io = complete_fast_pio_out; + } return ret; } @@ -6553,6 +6566,11 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) /* We should only ever be called with arch.pio.count equal to 1 */ BUG_ON(vcpu->arch.pio.count != 1); + if (unlikely(!kvm_is_linear_rip(vcpu, vcpu->arch.pio.linear_rip))) { + vcpu->arch.pio.count = 0; + return 1; + } + /* For size less than 4 we merge, else we zero extend */ val = (vcpu->arch.pio.size < 4) ? kvm_register_read(vcpu, VCPU_REGS_RAX) : 0; @@ -6565,7 +6583,7 @@ static int complete_fast_pio_in(struct kvm_vcpu *vcpu) vcpu->arch.pio.port, &val, 1); kvm_register_write(vcpu, VCPU_REGS_RAX, val); - return 1; + return kvm_skip_emulated_instruction(vcpu); } static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, @@ -6584,6 +6602,7 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, return ret; } + vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); vcpu->arch.complete_userspace_io = complete_fast_pio_in; return 0; @@ -6591,16 +6610,13 @@ static int kvm_fast_pio_in(struct kvm_vcpu *vcpu, int size, int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in) { - int ret = kvm_skip_emulated_instruction(vcpu); + int ret; - /* - * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered - * KVM_EXIT_DEBUG here. - */ if (in) - return kvm_fast_pio_in(vcpu, size, port) && ret; + ret = kvm_fast_pio_in(vcpu, size, port); else - return kvm_fast_pio_out(vcpu, size, port) && ret; + ret = kvm_fast_pio_out(vcpu, size, port); + return ret && kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_fast_pio); -- cgit From 8df98ae0ab2ead9a02228756eec26f8d7b17f499 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Mar 2019 13:19:26 -0700 Subject: KVM: selftests: assert on exit reason in CR4/cpuid sync test ...so that the test doesn't end up in an infinite loop if it fails for whatever reason, e.g. SHUTDOWN due to gcc inserting stack canary code into ucall() and attempting to derefence a null segment. Fixes: ca359066889f7 ("kvm: selftests: add cr4_cpuid_sync_test") Cc: Wei Huang Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- .../selftests/kvm/x86_64/cr4_cpuid_sync_test.c | 35 ++++++++++++---------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index d503a51fad30..7c2c4d4055a8 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -87,22 +87,25 @@ int main(int argc, char *argv[]) while (1) { rc = _vcpu_run(vm, VCPU_ID); - if (run->exit_reason == KVM_EXIT_IO) { - switch (get_ucall(vm, VCPU_ID, &uc)) { - case UCALL_SYNC: - /* emulate hypervisor clearing CR4.OSXSAVE */ - vcpu_sregs_get(vm, VCPU_ID, &sregs); - sregs.cr4 &= ~X86_CR4_OSXSAVE; - vcpu_sregs_set(vm, VCPU_ID, &sregs); - break; - case UCALL_ABORT: - TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); - break; - case UCALL_DONE: - goto done; - default: - TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); - } + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Unexpected exit reason: %u (%s),\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_SYNC: + /* emulate hypervisor clearing CR4.OSXSAVE */ + vcpu_sregs_get(vm, VCPU_ID, &sregs); + sregs.cr4 &= ~X86_CR4_OSXSAVE; + vcpu_sregs_set(vm, VCPU_ID, &sregs); + break; + case UCALL_ABORT: + TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit."); + break; + case UCALL_DONE: + goto done; + default: + TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd); } } -- cgit From 0a3f29b5a77d6c27796d7a7adabafd199dc066d5 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 13 Mar 2019 16:19:30 -0700 Subject: KVM: selftests: explicitly disable PIE for tests KVM selftests embed the guest "image" as a function in the test itself and extract the guest code at runtime by manually parsing the elf headers. The parsing is very simple and doesn't supporting fancy things like position independent executables. Recent versions of gcc enable pie by default, which results in triple fault shutdowns in the guest due to the virtual address in the headers not matching up with the virtual address retrieved from the function pointer. Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 3c1f4bdf9000..73d59c9d94a3 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -29,7 +29,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$( Date: Wed, 13 Mar 2019 12:43:14 -0700 Subject: KVM: selftests: disable stack protector for all KVM tests Since 4.8.3, gcc has enabled -fstack-protector by default. This is problematic for the KVM selftests as they do not configure fs or gs segments (the stack canary is pulled from fs:0x28). With the default behavior, gcc will insert a stack canary on any function that creates buffers of 8 bytes or more. As a result, ucall() will hit a triple fault shutdown due to reading a bad fs segment when inserting its stack canary, i.e. every test fails with an unexpected SHUTDOWN. Fixes: 14c47b7530e2d ("kvm: selftests: introduce ucall") Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 73d59c9d94a3..7514fcea91a7 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -29,8 +29,8 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include -CFLAGS += -O2 -g -std=gnu99 -no-pie -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude -I$( Date: Wed, 13 Mar 2019 16:49:31 -0700 Subject: KVM: selftests: complete IO before migrating guest state Documentation/virtual/kvm/api.txt states: NOTE: For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR and KVM_EXIT_EPR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish incomplete operations and then check for pending signals. Userspace can re-enter the guest with an unmasked signal pending to complete pending operations. Because guest state may be inconsistent, starting state migration after an IO exit without first completing IO may result in test failures, e.g. a proposed change to KVM's handling of %rip in its fast PIO handling[1] will cause the new VM, i.e. the post-migration VM, to have its %rip set to the IN instruction that triggered KVM_EXIT_IO, leading to a test assertion due to a stage mismatch. For simplicitly, require KVM_CAP_IMMEDIATE_EXIT to complete IO and skip the test if it's not available. The addition of KVM_CAP_IMMEDIATE_EXIT predates the state selftest by more than a year. [1] https://patchwork.kernel.org/patch/10848545/ Fixes: fa3899add1056 ("kvm: selftests: add basic test for state save and restore") Reported-by: Jim Mattson Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/include/kvm_util.h | 1 + tools/testing/selftests/kvm/lib/kvm_util.c | 16 ++++++++++++++++ tools/testing/selftests/kvm/x86_64/state_test.c | 18 ++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index a84785b02557..07b71ad9734a 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -102,6 +102,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva); struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid); +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index b52cfdefecbf..efa0aad8b3c6 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -1121,6 +1121,22 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) return rc; } +void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpuid); + int ret; + + TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); + + vcpu->state->immediate_exit = 1; + ret = ioctl(vcpu->fd, KVM_RUN, NULL); + vcpu->state->immediate_exit = 0; + + TEST_ASSERT(ret == -1 && errno == EINTR, + "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i", + ret, errno); +} + /* * VM VCPU Set MP State * diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 4b3f556265f1..30f75856cf39 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -134,6 +134,11 @@ int main(int argc, char *argv[]) struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); + if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) { + fprintf(stderr, "immediate_exit not available, skipping test\n"); + exit(KSFT_SKIP); + } + /* Create VM */ vm = vm_create_default(VCPU_ID, 0, guest_code); vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); @@ -156,8 +161,6 @@ int main(int argc, char *argv[]) stage, run->exit_reason, exit_reason_str(run->exit_reason)); - memset(®s1, 0, sizeof(regs1)); - vcpu_regs_get(vm, VCPU_ID, ®s1); switch (get_ucall(vm, VCPU_ID, &uc)) { case UCALL_ABORT: TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0], @@ -176,6 +179,17 @@ int main(int argc, char *argv[]) uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx", stage, (ulong)uc.args[1]); + /* + * When KVM exits to userspace with KVM_EXIT_IO, KVM guarantees + * guest state is consistent only after userspace re-enters the + * kernel with KVM_RUN. Complete IO prior to migrating state + * to a new VM. + */ + vcpu_run_complete_io(vm, VCPU_ID); + + memset(®s1, 0, sizeof(regs1)); + vcpu_regs_get(vm, VCPU_ID, ®s1); + state = vcpu_save_state(vm, VCPU_ID); kvm_vm_release(vm); -- cgit From 919f6cd8bb2fe7151f8aecebc3b3d1ca2567396e Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 15 Feb 2019 12:48:40 -0800 Subject: KVM: doc: Document the life cycle of a VM and its resources The series to add memcg accounting to KVM allocations[1] states: There are many KVM kernel memory allocations which are tied to the life of the VM process and should be charged to the VM process's cgroup. While it is correct to account KVM kernel allocations to the cgroup of the process that created the VM, it's technically incorrect to state that the KVM kernel memory allocations are tied to the life of the VM process. This is because the VM itself, i.e. struct kvm, is not tied to the life of the process which created it, rather it is tied to the life of its associated file descriptor. In other words, kvm_destroy_vm() is not invoked until fput() decrements its associated file's refcount to zero. A simple example is to fork() in Qemu and have the child sleep indefinitely; kvm_destroy_vm() isn't called until Qemu closes its file descriptor *and* the rogue child is killed. The allocations are guaranteed to be *accounted* to the process which created the VM, but only because KVM's per-{VM,vCPU} ioctls reject the ioctl() with -EIO if kvm->mm != current->mm. I.e. the child can keep the VM "alive" but can't do anything useful with its reference. Note that because 'struct kvm' also holds a reference to the mm_struct of its owner, the above behavior also applies to userspace allocations. Given that mucking with a VM's file descriptor can lead to subtle and undesirable behavior, e.g. memcg charges persisting after a VM is shut down, explicitly document a VM's lifecycle and its impact on the VM's resources. Alternatively, KVM could aggressively free resources when the creating process exits, e.g. via mmu_notifier->release(). However, mmu_notifier isn't guaranteed to be available, and freeing resources when the creator exits is likely to be error prone and fragile as KVM would need to ensure that it only freed resources that are truly out of reach. In practice, the existing behavior shouldn't be problematic as a properly configured system will prevent a child process from being moved out of the appropriate cgroup hierarchy, i.e. prevent hiding the process from the OOM killer, and will prevent an unprivileged user from being able to to hold a reference to struct kvm via another method, e.g. debugfs. [1]https://patchwork.kernel.org/patch/10806707/ Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 88ecbc7645db..f39969d0121e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -69,6 +69,23 @@ by and on behalf of the VM's process may not be freed/unaccounted when the VM is shut down. +It is important to note that althought VM ioctls may only be issued from +the process that created the VM, a VM's lifecycle is associated with its +file descriptor, not its creator (process). In other words, the VM and +its resources, *including the associated address space*, are not freed +until the last reference to the VM's file descriptor has been released. +For example, if fork() is issued after ioctl(KVM_CREATE_VM), the VM will +not be freed until both the parent (original) process and its child have +put their references to the VM's file descriptor. + +Because a VM's resources are not freed until the last reference to its +file descriptor is released, creating additional references to a VM via +via fork(), dup(), etc... without careful consideration is strongly +discouraged and may have unwanted side effects, e.g. memory allocated +by and on behalf of the VM's process may not be freed/unaccounted when +the VM is shut down. + + 3. Extensions ------------- -- cgit From e2788c4a41cb5fa68096f5a58bccacec1a700295 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 28 Mar 2019 17:22:31 +0100 Subject: Documentation: kvm: clarify KVM_SET_USER_MEMORY_REGION The documentation does not mention how to delete a slot, add the information. Reported-by: Nathaniel McCallum Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index f39969d0121e..67068c47c591 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1114,14 +1114,12 @@ struct kvm_userspace_memory_region { #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) -This ioctl allows the user to create or modify a guest physical memory -slot. When changing an existing slot, it may be moved in the guest -physical memory space, or its flags may be modified. It may not be -resized. Slots may not overlap in guest physical address space. -Bits 0-15 of "slot" specifies the slot id and this value should be -less than the maximum number of user memory slots supported per VM. -The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, -if this capability is supported by the architecture. +This ioctl allows the user to create, modify or delete a guest physical +memory slot. Bits 0-15 of "slot" specify the slot id and this value +should be less than the maximum number of user memory slots supported per +VM. The maximum allowed slots can be queried using KVM_CAP_NR_MEMSLOTS, +if this capability is supported by the architecture. Slots may not +overlap in guest physical address space. If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot" specifies the address space which is being modified. They must be @@ -1130,6 +1128,10 @@ KVM_CAP_MULTI_ADDRESS_SPACE capability. Slots in separate address spaces are unrelated; the restriction on overlapping slots only applies within each address space. +Deleting a slot is done by passing zero for memory_size. When changing +an existing slot, it may be moved in the guest physical memory space, +or its flags may be modified, but it may not be resized. + Memory for the region is taken starting at the address denoted by the field userspace_addr, which must point at user addressable memory for the entire memory slot size. Any object may back this memory, including -- cgit From f7299d441a4da8a5088e651ea55023525a793a13 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 28 Mar 2019 14:13:47 +0100 Subject: gpio: of: Fix of_gpiochip_add() error path If the call to of_gpiochip_scan_gpios() in of_gpiochip_add() fails, no error handling is performed. This lead to the need of callers to call of_gpiochip_remove() on failure, which causes "BAD of_node_put() on ..." if the failure happened before the call to of_node_get(). Fix this by adding proper error handling. Note that calling gpiochip_remove_pin_ranges() multiple times causes no harm: subsequent calls are a no-op. Fixes: dfbd379ba9b7431e ("gpio: of: Return error if gpio hog configuration failed") Signed-off-by: Geert Uytterhoeven Reviewed-by: Mukesh Ojha Signed-off-by: Linus Walleij --- drivers/gpio/gpiolib-of.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 0220dd6d64ed..6a3ec575a404 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -718,7 +718,13 @@ int of_gpiochip_add(struct gpio_chip *chip) of_node_get(chip->of_node); - return of_gpiochip_scan_gpios(chip); + status = of_gpiochip_scan_gpios(chip); + if (status) { + of_node_put(chip->of_node); + gpiochip_remove_pin_ranges(chip); + } + + return status; } void of_gpiochip_remove(struct gpio_chip *chip) -- cgit From 1aa176ef5a451adc0546d5aaa3fb107975c786b7 Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Wed, 27 Mar 2019 17:21:42 -0700 Subject: Yama: mark local symbols as static sparse complains that Yama defines functions and a variable as non-static even though they don't exist in any header. Fix it by making them static. Co-developed-by: Mukesh Ojha Signed-off-by: Mukesh Ojha Signed-off-by: Jann Horn [kees: merged similar static-ness fixes into a single patch] Link: https://lkml.kernel.org/r/20190326230841.87834-1-jannh@google.com Link: https://lkml.kernel.org/r/1553673018-19234-1-git-send-email-mojha@codeaurora.org Signed-off-by: Kees Cook Signed-off-by: James Morris --- security/yama/yama_lsm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 57cc60722dd3..efac68556b45 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -206,7 +206,7 @@ static void yama_ptracer_del(struct task_struct *tracer, * yama_task_free - check for task_pid to remove from exception list * @task: task being removed */ -void yama_task_free(struct task_struct *task) +static void yama_task_free(struct task_struct *task) { yama_ptracer_del(task, task); } @@ -222,7 +222,7 @@ void yama_task_free(struct task_struct *task) * Return 0 on success, -ve on error. -ENOSYS is returned when Yama * does not handle the given option. */ -int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, +static int yama_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { int rc = -ENOSYS; @@ -401,7 +401,7 @@ static int yama_ptrace_access_check(struct task_struct *child, * * Returns 0 if following the ptrace is allowed, -ve on error. */ -int yama_ptrace_traceme(struct task_struct *parent) +static int yama_ptrace_traceme(struct task_struct *parent) { int rc = 0; @@ -452,7 +452,7 @@ static int yama_dointvec_minmax(struct ctl_table *table, int write, static int zero; static int max_scope = YAMA_SCOPE_NO_ATTACH; -struct ctl_path yama_sysctl_path[] = { +static struct ctl_path yama_sysctl_path[] = { { .procname = "kernel", }, { .procname = "yama", }, { } -- cgit From ce9fb53c72834646f26ecb2213e40e6876048f87 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Thu, 28 Mar 2019 11:38:06 +0100 Subject: gpio: mockup: use simple_read_from_buffer() in debugfs read callback Calling read() for a single byte read will return 2 currently. Use simple_read_from_buffer() which correctly handles all sizes. Fixes: 2a9e27408e12 ("gpio: mockup: rework debugfs interface") Reviewed-by: Mukesh Ojha Signed-off-by: Bartosz Golaszewski --- drivers/gpio/gpio-mockup.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/gpio/gpio-mockup.c b/drivers/gpio/gpio-mockup.c index 74ba8b1d71d8..b6a4efce7c92 100644 --- a/drivers/gpio/gpio-mockup.c +++ b/drivers/gpio/gpio-mockup.c @@ -204,10 +204,9 @@ static ssize_t gpio_mockup_debugfs_read(struct file *file, struct gpio_mockup_chip *chip; struct seq_file *sfile; struct gpio_chip *gc; - int val, rv, cnt; + int val, cnt; char buf[3]; - if (*ppos != 0) return 0; @@ -219,12 +218,7 @@ static ssize_t gpio_mockup_debugfs_read(struct file *file, val = gpio_mockup_get(gc, priv->offset); cnt = snprintf(buf, sizeof(buf), "%d\n", val); - rv = copy_to_user(usr_buf, buf, cnt); - if (rv) - return rv; - - *ppos += cnt; - return cnt; + return simple_read_from_buffer(usr_buf, size, ppos, buf, cnt); } static ssize_t gpio_mockup_debugfs_write(struct file *file, -- cgit From 988aef9e8b0dd46b55ad08b1522429739e26122d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 15 Mar 2019 08:41:04 +0100 Subject: nvme-tcp: fix an endianess miss-annotation nvme_tcp_end_request just takes the status value and the converts it to little endian as well as shifting for the phase bit. Fixes: 43ce38a6d823 ("nvme-tcp: support C2HData with SUCCESS flag") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index e7e08889865e..68c49dd67210 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -627,7 +627,7 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return ret; } -static inline void nvme_tcp_end_request(struct request *rq, __le16 status) +static inline void nvme_tcp_end_request(struct request *rq, u16 status) { union nvme_result res = {}; -- cgit From cc2278c413c3a06a93c23ee8722e4dd3d621de12 Mon Sep 17 00:00:00 2001 From: Martin George Date: Wed, 27 Mar 2019 09:52:56 +0100 Subject: nvme-multipath: relax ANA state check When undergoing state transitions I/O might be requeued, hence we should always call nvme_mpath_set_live() to schedule requeue_work whenever the nvme device is live, independent on whether the old state was live or not. Signed-off-by: Martin George Signed-off-by: Gargi Srinivas Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 2839bb70badf..f0716f6ce41f 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -404,15 +404,12 @@ static inline bool nvme_state_is_live(enum nvme_ana_state state) static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, struct nvme_ns *ns) { - enum nvme_ana_state old; - mutex_lock(&ns->head->lock); - old = ns->ana_state; ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - if (nvme_state_is_live(ns->ana_state) && !nvme_state_is_live(old)) + if (nvme_state_is_live(ns->ana_state)) nvme_mpath_set_live(ns); mutex_unlock(&ns->head->lock); } -- cgit From 02db99548d3608a625cf481cff2bb7b626829b3f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Wed, 27 Mar 2019 17:07:22 +0800 Subject: nvmet: fix building bvec from sg list There are two mistakes for building bvec from sg list for file backed ns: - use request data length to compute number of io vector, this way doesn't consider sg->offset, and the result may be smaller than required io vectors - bvec->bv_len isn't capped by sg->length This patch fixes this issue by building bvec from sg directly, given the whole IO stack is ready for multi-page bvec. Reported-by: Yi Zhang Fixes: 3a85a5de29ea ("nvme-loop: add a NVMe loopback host driver") Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 3e43212d3c1c..bc6ebb51b0bf 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -75,11 +75,11 @@ err: return ret; } -static void nvmet_file_init_bvec(struct bio_vec *bv, struct sg_page_iter *iter) +static void nvmet_file_init_bvec(struct bio_vec *bv, struct scatterlist *sg) { - bv->bv_page = sg_page_iter_page(iter); - bv->bv_offset = iter->sg->offset; - bv->bv_len = PAGE_SIZE - iter->sg->offset; + bv->bv_page = sg_page(sg); + bv->bv_offset = sg->offset; + bv->bv_len = sg->length; } static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, @@ -128,14 +128,14 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) { - ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); - struct sg_page_iter sg_pg_iter; + ssize_t nr_bvec = req->sg_cnt; unsigned long bv_cnt = 0; bool is_sync = false; size_t len = 0, total_len = 0; ssize_t ret = 0; loff_t pos; - + int i; + struct scatterlist *sg; if (req->f.mpool_alloc && nr_bvec > NVMET_MAX_MPOOL_BVEC) is_sync = true; @@ -147,8 +147,8 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) } memset(&req->f.iocb, 0, sizeof(struct kiocb)); - for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { - nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); + for_each_sg(req->sg, sg, req->sg_cnt, i) { + nvmet_file_init_bvec(&req->f.bvec[bv_cnt], sg); len += req->f.bvec[bv_cnt].bv_len; total_len += req->f.bvec[bv_cnt].bv_len; bv_cnt++; @@ -225,7 +225,7 @@ static void nvmet_file_submit_buffered_io(struct nvmet_req *req) static void nvmet_file_execute_rw(struct nvmet_req *req) { - ssize_t nr_bvec = DIV_ROUND_UP(req->data_len, PAGE_SIZE); + ssize_t nr_bvec = req->sg_cnt; if (!req->sg_cnt || !nr_bvec) { nvmet_req_complete(req, 0); -- cgit From a536b49785759bf99465fdf6e248d34322123fcd Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 28 Mar 2019 12:54:03 +0200 Subject: nvmet: fix error flow during ns enable In case we fail to enable p2pmem on the current namespace, disable the backing store device before exiting. Cc: Stephen Bates Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 2d73b66e3686..b3e765a95af8 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -509,7 +509,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) ret = nvmet_p2pmem_ns_enable(ns); if (ret) - goto out_unlock; + goto out_dev_disable; list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) nvmet_p2pmem_ns_add_p2p(ctrl, ns); @@ -550,7 +550,7 @@ out_unlock: out_dev_put: list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid)); - +out_dev_disable: nvmet_ns_dev_disable(ns); goto out_unlock; } -- cgit From c8fa7a807f3c5f946bd92076fbaf7826edb650dc Mon Sep 17 00:00:00 2001 From: Solomon Tan Date: Fri, 22 Mar 2019 13:22:55 +0800 Subject: perf cs-etm: Add missing case value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The following error was thrown when compiling `tools/perf` using OpenCSD v0.11.1. This patch fixes said error. CC util/intel-pt-decoder/intel-pt-log.o CC util/cs-etm-decoder/cs-etm-decoder.o util/cs-etm-decoder/cs-etm-decoder.c: In function ‘cs_etm_decoder__buffer_range’: util/cs-etm-decoder/cs-etm-decoder.c:370:2: error: enumeration value ‘OCSD_INSTR_WFI_WFE’ not handled in switch [-Werror=switch-enum] switch (elem->last_i_type) { ^~~~~~ CC util/intel-pt-decoder/intel-pt-decoder.o cc1: all warnings being treated as errors Because `OCSD_INSTR_WFI_WFE` case was added only in v0.11.0, the minimum required OpenCSD library version for this patch is no longer v0.10.0. Signed-off-by: Solomon Tan Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Robert Walker Cc: Suzuki K Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20190322052255.GA4809@w-OptiPlex-7050 Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/feature/test-libopencsd.c | 4 ++-- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c index d68eb4fb40cc..2b0e02c38870 100644 --- a/tools/build/feature/test-libopencsd.c +++ b/tools/build/feature/test-libopencsd.c @@ -4,9 +4,9 @@ /* * Check OpenCSD library version is sufficient to provide required features */ -#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0)) +#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0)) #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER) -#error "OpenCSD >= 0.10.0 is required" +#error "OpenCSD >= 0.11.0 is required" #endif int main(void) diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index ba4c623cd8de..39fe21e1cf93 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -387,6 +387,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder, break; case OCSD_INSTR_ISB: case OCSD_INSTR_DSB_DMB: + case OCSD_INSTR_WFI_WFE: case OCSD_INSTR_OTHER: default: packet->last_instr_taken_branch = false; -- cgit From f3b4e06b3bda759afd042d3d5fa86bea8f1fe278 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 25 Mar 2019 15:51:35 +0200 Subject: perf intel-pt: Fix TSC slip A TSC packet can slip past MTC packets so that the timestamp appears to go backwards. One estimate is that can be up to about 40 CPU cycles, which is certainly less than 0x1000 TSC ticks, but accept slippage an order of magnitude more to be on the safe side. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Cc: stable@vger.kernel.org Fixes: 79b58424b821c ("perf tools: Add Intel PT support for decoding MTC packets") Link: http://lkml.kernel.org/r/20190325135135.18348-1-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c index 6e03db142091..872fab163585 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -251,19 +251,15 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params) if (!(decoder->tsc_ctc_ratio_n % decoder->tsc_ctc_ratio_d)) decoder->tsc_ctc_mult = decoder->tsc_ctc_ratio_n / decoder->tsc_ctc_ratio_d; - - /* - * Allow for timestamps appearing to backwards because a TSC - * packet has slipped past a MTC packet, so allow 2 MTC ticks - * or ... - */ - decoder->tsc_slip = multdiv(2 << decoder->mtc_shift, - decoder->tsc_ctc_ratio_n, - decoder->tsc_ctc_ratio_d); } - /* ... or 0x100 paranoia */ - if (decoder->tsc_slip < 0x100) - decoder->tsc_slip = 0x100; + + /* + * A TSC packet can slip past MTC packets so that the timestamp appears + * to go backwards. One estimate is that can be up to about 40 CPU + * cycles, which is certainly less than 0x1000 TSC ticks, but accept + * slippage an order of magnitude more to be on the safe side. + */ + decoder->tsc_slip = 0x10000; intel_pt_log("timestamp: mtc_shift %u\n", decoder->mtc_shift); intel_pt_log("timestamp: tsc_ctc_ratio_n %u\n", decoder->tsc_ctc_ratio_n); -- cgit From 4e8a5c1551370ebc0fbdb8f5c33dad13e45bdc99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 14 Mar 2019 15:00:10 +0100 Subject: perf evsel: Fix max perf_event_attr.precise_ip detection After a discussion with Andi, move the perf_event_attr.precise_ip detection for maximum precise config (via :P modifier or for default cycles event) to perf_evsel__open(). The current detection in perf_event_attr__set_max_precise_ip() is tricky, because precise_ip config is specific for given event and it currently checks only hw cycles. We now check for valid precise_ip value right after failing sys_perf_event_open() for specific event, before any of the perf_event_attr fallback code gets executed. This way we get the proper config in perf_event_attr together with allowed precise_ip settings. We can see that code activity with -vv, like: $ perf record -vv ls ... ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 ... precise_ip 3 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 ------------------------------------------------------------ sys_perf_event_open: pid 9926 cpu 0 group_fd -1 flags 0x8 sys_perf_event_open failed, error -95 decreasing precise_ip by one (2) ------------------------------------------------------------ perf_event_attr: size 112 { sample_period, sample_freq } 4000 ... precise_ip 2 sample_id_all 1 exclude_guest 1 mmap2 1 comm_exec 1 ksymbol 1 ------------------------------------------------------------ sys_perf_event_open: pid 9926 cpu 0 group_fd -1 flags 0x8 = 4 ... Suggested-by: Andi Kleen Signed-off-by: Jiri Olsa Link: http://lkml.kernel.org/n/tip-dkvxxbeg7lu74155d4jhlmc9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 29 ------------------- tools/perf/util/evlist.h | 2 -- tools/perf/util/evsel.c | 72 +++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 59 insertions(+), 44 deletions(-) diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ec78e93085de..6689378ee577 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -231,35 +231,6 @@ void perf_evlist__set_leader(struct perf_evlist *evlist) } } -void perf_event_attr__set_max_precise_ip(struct perf_event_attr *pattr) -{ - struct perf_event_attr attr = { - .type = PERF_TYPE_HARDWARE, - .config = PERF_COUNT_HW_CPU_CYCLES, - .exclude_kernel = 1, - .precise_ip = 3, - }; - - event_attr_init(&attr); - - /* - * Unnamed union member, not supported as struct member named - * initializer in older compilers such as gcc 4.4.7 - */ - attr.sample_period = 1; - - while (attr.precise_ip != 0) { - int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); - if (fd != -1) { - close(fd); - break; - } - --attr.precise_ip; - } - - pattr->precise_ip = attr.precise_ip; -} - int __perf_evlist__add_default(struct perf_evlist *evlist, bool precise) { struct perf_evsel *evsel = perf_evsel__new_cycles(precise); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index dcb68f34d2cd..6a94785b9100 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -315,8 +315,6 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); -void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr); - struct perf_evsel * perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 7835e05f0c0a..66d066f18b5b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -295,7 +295,6 @@ struct perf_evsel *perf_evsel__new_cycles(bool precise) if (!precise) goto new_event; - perf_event_attr__set_max_precise_ip(&attr); /* * Now let the usual logic to set up the perf_event_attr defaults * to kick in when we return and before perf_evsel__open() is called. @@ -305,6 +304,8 @@ new_event: if (evsel == NULL) goto out; + evsel->precise_max = true; + /* use asprintf() because free(evsel) assumes name is allocated */ if (asprintf(&evsel->name, "cycles%s%s%.*s", (attr.precise_ip || attr.exclude_kernel) ? ":" : "", @@ -1083,7 +1084,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, } if (evsel->precise_max) - perf_event_attr__set_max_precise_ip(attr); + attr->precise_ip = 3; if (opts->all_user) { attr->exclude_kernel = 1; @@ -1749,6 +1750,59 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, return true; } +static void display_attr(struct perf_event_attr *attr) +{ + if (verbose >= 2) { + fprintf(stderr, "%.60s\n", graph_dotted_line); + fprintf(stderr, "perf_event_attr:\n"); + perf_event_attr__fprintf(stderr, attr, __open_attr__fprintf, NULL); + fprintf(stderr, "%.60s\n", graph_dotted_line); + } +} + +static int perf_event_open(struct perf_evsel *evsel, + pid_t pid, int cpu, int group_fd, + unsigned long flags) +{ + int precise_ip = evsel->attr.precise_ip; + int fd; + + while (1) { + pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", + pid, cpu, group_fd, flags); + + fd = sys_perf_event_open(&evsel->attr, pid, cpu, group_fd, flags); + if (fd >= 0) + break; + + /* + * Do quick precise_ip fallback if: + * - there is precise_ip set in perf_event_attr + * - maximum precise is requested + * - sys_perf_event_open failed with ENOTSUP error, + * which is associated with wrong precise_ip + */ + if (!precise_ip || !evsel->precise_max || (errno != ENOTSUP)) + break; + + /* + * We tried all the precise_ip values, and it's + * still failing, so leave it to standard fallback. + */ + if (!evsel->attr.precise_ip) { + evsel->attr.precise_ip = precise_ip; + break; + } + + pr_debug2("\nsys_perf_event_open failed, error %d\n", -ENOTSUP); + evsel->attr.precise_ip--; + pr_debug2("decreasing precise_ip by one (%d)\n", evsel->attr.precise_ip); + display_attr(&evsel->attr); + } + + return fd; +} + int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1824,12 +1878,7 @@ retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; - if (verbose >= 2) { - fprintf(stderr, "%.60s\n", graph_dotted_line); - fprintf(stderr, "perf_event_attr:\n"); - perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL); - fprintf(stderr, "%.60s\n", graph_dotted_line); - } + display_attr(&evsel->attr); for (cpu = 0; cpu < cpus->nr; cpu++) { @@ -1841,13 +1890,10 @@ retry_sample_id: group_fd = get_group_fd(evsel, cpu, thread); retry_open: - pr_debug2("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", - pid, cpus->map[cpu], group_fd, flags); - test_attr__ready(); - fd = sys_perf_event_open(&evsel->attr, pid, cpus->map[cpu], - group_fd, flags); + fd = perf_event_open(evsel, pid, cpus->map[cpu], + group_fd, flags); FD(evsel, cpu, thread) = fd; -- cgit From be709d48329a500621d2a05835283150ae137b45 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Mar 2019 14:06:07 -0300 Subject: tools headers uapi: Sync asm-generic/mman-common.h and linux/mman.h To deal with the move of some defines from asm-generic/mmap-common.h to linux/mman.h done in: 746c9398f5ac ("arch: move common mmap flags to linux/mman.h") The generated mmap_flags array stays the same: $ tools/perf/trace/beauty/mmap_flags.sh static const char *mmap_flags[] = { [ilog2(0x40) + 1] = "32BIT", [ilog2(0x01) + 1] = "SHARED", [ilog2(0x02) + 1] = "PRIVATE", [ilog2(0x10) + 1] = "FIXED", [ilog2(0x20) + 1] = "ANONYMOUS", [ilog2(0x100000) + 1] = "FIXED_NOREPLACE", [ilog2(0x0100) + 1] = "GROWSDOWN", [ilog2(0x0800) + 1] = "DENYWRITE", [ilog2(0x1000) + 1] = "EXECUTABLE", [ilog2(0x2000) + 1] = "LOCKED", [ilog2(0x4000) + 1] = "NORESERVE", [ilog2(0x8000) + 1] = "POPULATE", [ilog2(0x10000) + 1] = "NONBLOCK", [ilog2(0x20000) + 1] = "STACK", [ilog2(0x40000) + 1] = "HUGETLB", [ilog2(0x80000) + 1] = "SYNC", }; $ And to have the system's sys/mman.h find the definition of MAP_SHARED and MAP_PRIVATE, make sure they are defined in the tools/ mman-common.h in a way that keeps it the same as the kernel's, need for keeping the Android's NDK cross build working. This silences these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/mman-common.h' differs from latest version at 'include/uapi/asm-generic/mman-common.h' diff -u tools/include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/mman-common.h Warning: Kernel ABI header at 'tools/include/uapi/linux/mman.h' differs from latest version at 'include/uapi/linux/mman.h' diff -u tools/include/uapi/linux/mman.h include/uapi/linux/mman.h Cc: Adrian Hunter Cc: Arnd Bergmann Cc: Jiri Olsa Cc: Michael S. Tsirkin Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-h80ycpc6pedg9s5z2rwpy6ws@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/alpha/include/uapi/asm/mman.h | 2 -- tools/arch/mips/include/uapi/asm/mman.h | 2 -- tools/arch/parisc/include/uapi/asm/mman.h | 2 -- tools/arch/xtensa/include/uapi/asm/mman.h | 2 -- tools/include/uapi/asm-generic/mman-common-tools.h | 23 ++++++++++++++++++++++ tools/include/uapi/asm-generic/mman-common.h | 4 +--- tools/include/uapi/asm-generic/mman.h | 2 +- tools/include/uapi/linux/mman.h | 4 ++++ tools/perf/Makefile.perf | 4 ++-- tools/perf/check-headers.sh | 2 +- tools/perf/trace/beauty/mmap_flags.sh | 14 ++++++++++--- 11 files changed, 43 insertions(+), 18 deletions(-) create mode 100644 tools/include/uapi/asm-generic/mman-common-tools.h diff --git a/tools/arch/alpha/include/uapi/asm/mman.h b/tools/arch/alpha/include/uapi/asm/mman.h index c317d3e6867a..ea6a255ae61f 100644 --- a/tools/arch/alpha/include/uapi/asm/mman.h +++ b/tools/arch/alpha/include/uapi/asm/mman.h @@ -27,8 +27,6 @@ #define MAP_NONBLOCK 0x40000 #define MAP_NORESERVE 0x10000 #define MAP_POPULATE 0x20000 -#define MAP_PRIVATE 0x02 -#define MAP_SHARED 0x01 #define MAP_STACK 0x80000 #define PROT_EXEC 0x4 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/arch/mips/include/uapi/asm/mman.h b/tools/arch/mips/include/uapi/asm/mman.h index de2206883abc..c8acaa138d46 100644 --- a/tools/arch/mips/include/uapi/asm/mman.h +++ b/tools/arch/mips/include/uapi/asm/mman.h @@ -28,8 +28,6 @@ #define MAP_NONBLOCK 0x20000 #define MAP_NORESERVE 0x0400 #define MAP_POPULATE 0x10000 -#define MAP_PRIVATE 0x002 -#define MAP_SHARED 0x001 #define MAP_STACK 0x40000 #define PROT_EXEC 0x04 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h index 1bd78758bde9..f9fd1325f5bd 100644 --- a/tools/arch/parisc/include/uapi/asm/mman.h +++ b/tools/arch/parisc/include/uapi/asm/mman.h @@ -27,8 +27,6 @@ #define MAP_NONBLOCK 0x20000 #define MAP_NORESERVE 0x4000 #define MAP_POPULATE 0x10000 -#define MAP_PRIVATE 0x02 -#define MAP_SHARED 0x01 #define MAP_STACK 0x40000 #define PROT_EXEC 0x4 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/arch/xtensa/include/uapi/asm/mman.h b/tools/arch/xtensa/include/uapi/asm/mman.h index 34dde6f44dae..f2b08c990afc 100644 --- a/tools/arch/xtensa/include/uapi/asm/mman.h +++ b/tools/arch/xtensa/include/uapi/asm/mman.h @@ -27,8 +27,6 @@ #define MAP_NONBLOCK 0x20000 #define MAP_NORESERVE 0x0400 #define MAP_POPULATE 0x10000 -#define MAP_PRIVATE 0x002 -#define MAP_SHARED 0x001 #define MAP_STACK 0x40000 #define PROT_EXEC 0x4 #define PROT_GROWSDOWN 0x01000000 diff --git a/tools/include/uapi/asm-generic/mman-common-tools.h b/tools/include/uapi/asm-generic/mman-common-tools.h new file mode 100644 index 000000000000..af7d0d3a3182 --- /dev/null +++ b/tools/include/uapi/asm-generic/mman-common-tools.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H +#define __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H + +#include + +/* We need this because we need to have tools/include/uapi/ included in the tools + * header search path to get access to stuff that is not yet in the system's + * copy of the files in that directory, but since this cset: + * + * 746c9398f5ac ("arch: move common mmap flags to linux/mman.h") + * + * We end up making sys/mman.h, that is in the system headers, to not find the + * MAP_SHARED and MAP_PRIVATE defines because they are not anymore in our copy + * of asm-generic/mman-common.h. So we define them here and include this header + * from each of the per arch mman.h headers. + */ +#ifndef MAP_SHARED +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +#endif +#endif // __ASM_GENERIC_MMAN_COMMON_TOOLS_ONLY_H diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index e7ee32861d51..abd238d0f7a4 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -15,9 +15,7 @@ #define PROT_GROWSDOWN 0x01000000 /* mprotect flag: extend change to start of growsdown vma */ #define PROT_GROWSUP 0x02000000 /* mprotect flag: extend change to end of growsup vma */ -#define MAP_SHARED 0x01 /* Share changes */ -#define MAP_PRIVATE 0x02 /* Changes are private */ -#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ +/* 0x01 - 0x03 are defined in linux/mman.h */ #define MAP_TYPE 0x0f /* Mask for type of mapping */ #define MAP_FIXED 0x10 /* Interpret addr exactly */ #define MAP_ANONYMOUS 0x20 /* don't use a file */ diff --git a/tools/include/uapi/asm-generic/mman.h b/tools/include/uapi/asm-generic/mman.h index 653687d9771b..36c197fc44a0 100644 --- a/tools/include/uapi/asm-generic/mman.h +++ b/tools/include/uapi/asm-generic/mman.h @@ -2,7 +2,7 @@ #ifndef __ASM_GENERIC_MMAN_H #define __ASM_GENERIC_MMAN_H -#include +#include #define MAP_GROWSDOWN 0x0100 /* stack-like segment */ #define MAP_DENYWRITE 0x0800 /* ETXTBSY */ diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index d0f515d53299..fc1a64c3447b 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -12,6 +12,10 @@ #define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_NEVER 2 +#define MAP_SHARED 0x01 /* Share changes */ +#define MAP_PRIVATE 0x02 /* Changes are private */ +#define MAP_SHARED_VALIDATE 0x03 /* share + validate extension flags */ + /* * Huge page size encoding when MAP_HUGETLB is specified, and a huge page * size other than the default is desired. See hugetlb_encode.h. diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 01f7555fd933..e8c9f77e9010 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -481,8 +481,8 @@ $(madvise_behavior_array): $(madvise_hdr_dir)/mman-common.h $(madvise_behavior_t mmap_flags_array := $(beauty_outdir)/mmap_flags_array.c mmap_flags_tbl := $(srctree)/tools/perf/trace/beauty/mmap_flags.sh -$(mmap_flags_array): $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) - $(Q)$(SHELL) '$(mmap_flags_tbl)' $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ +$(mmap_flags_array): $(linux_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman.h $(asm_generic_uapi_dir)/mman-common.h $(mmap_flags_tbl) + $(Q)$(SHELL) '$(mmap_flags_tbl)' $(linux_uapi_dir) $(asm_generic_uapi_dir) $(arch_asm_uapi_dir) > $@ mount_flags_array := $(beauty_outdir)/mount_flags_array.c mount_flags_tbl := $(srctree)/tools/perf/trace/beauty/mount_flags.sh diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 7b55613924de..c68ee06cae63 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -103,7 +103,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include "' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include "' -check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common.h>"' +check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' # diff non-symmetric files diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh index 32bac9c0d694..5f5eefcb3c74 100755 --- a/tools/perf/trace/beauty/mmap_flags.sh +++ b/tools/perf/trace/beauty/mmap_flags.sh @@ -1,15 +1,18 @@ #!/bin/sh # SPDX-License-Identifier: LGPL-2.1 -if [ $# -ne 2 ] ; then +if [ $# -ne 3 ] ; then [ $# -eq 1 ] && hostarch=$1 || hostarch=`uname -m | sed -e s/i.86/x86/ -e s/x86_64/x86/` + linux_header_dir=tools/include/uapi/linux header_dir=tools/include/uapi/asm-generic arch_header_dir=tools/arch/${hostarch}/include/uapi/asm else - header_dir=$1 - arch_header_dir=$2 + linux_header_dir=$1 + header_dir=$2 + arch_header_dir=$3 fi +linux_mman=${linux_header_dir}/mman.h arch_mman=${arch_header_dir}/mman.h # those in egrep -vw are flags, we want just the bits @@ -20,6 +23,11 @@ egrep -q $regex ${arch_mman} && \ (egrep $regex ${arch_mman} | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") +egrep -q $regex ${linux_mman} && \ +(egrep $regex ${linux_mman} | \ + egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \ + sed -r "s/$regex/\2 \1/g" | \ + xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n") ([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+ Date: Mon, 25 Mar 2019 14:22:47 -0300 Subject: tools headers uapi: Sync linux/fcntl.h to get the F_SEAL_FUTURE_WRITE addition To get the changes in: ab3948f58ff8 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd") And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/fcntl.h' differs from latest version at 'include/uapi/linux/fcntl.h' diff -u tools/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Joel Fernandes (Google) Cc: Linus Torvalds Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-lvfx5cgf0xzmdi9mcjva1ttl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/fcntl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 6448cdd9a350..a2f8658f1c55 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -41,6 +41,7 @@ #define F_SEAL_SHRINK 0x0002 /* prevent file from shrinking */ #define F_SEAL_GROW 0x0004 /* prevent file from growing */ #define F_SEAL_WRITE 0x0008 /* prevent writes */ +#define F_SEAL_FUTURE_WRITE 0x0010 /* prevent future writes while mapped */ /* (1U << 31) is reserved for signed error codes */ /* -- cgit From 949af89af02c2d66db973c5bca01b7858e1ce0ba Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Mar 2019 14:25:33 -0300 Subject: tools arch x86: Sync asm/cpufeatures.h with the kernel sources To get the changes from: 52f64909409c ("x86: Add TSX Force Abort CPUID/MSR") That don't cause any changes in the generated perf binaries. And silence this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Link: https://lkml.kernel.org/n/tip-zv8kw8vnb1zppflncpwfsv2w@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 6d6122524711..981ff9479648 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -344,6 +344,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ #define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -- cgit From 82392516e9e0818cd2227cf9a16205c90a6cacfa Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Mar 2019 14:28:20 -0300 Subject: tools headers uapi: Update drm/i915_drm.h To get the changes in: e46c2e99f600 ("drm/i915: Expose RPCS (SSEU) configuration to userspace (Gen11 only)") That don't cause changes in the generated perf binaries. To silence this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Tvrtko Ursulin Link: https://lkml.kernel.org/n/tip-h6bspm1nomjnpr90333rrx7q@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 64 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 298b2e197744..397810fa2d33 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -1486,9 +1486,73 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ #define I915_CONTEXT_DEFAULT_PRIORITY 0 #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 __u64 value; }; +/** + * Context SSEU programming + * + * It may be necessary for either functional or performance reason to configure + * a context to run with a reduced number of SSEU (where SSEU stands for Slice/ + * Sub-slice/EU). + * + * This is done by configuring SSEU configuration using the below + * @struct drm_i915_gem_context_param_sseu for every supported engine which + * userspace intends to use. + * + * Not all GPUs or engines support this functionality in which case an error + * code -ENODEV will be returned. + * + * Also, flexibility of possible SSEU configuration permutations varies between + * GPU generations and software imposed limitations. Requesting such a + * combination will return an error code of -EINVAL. + * + * NOTE: When perf/OA is active the context's SSEU configuration is ignored in + * favour of a single global setting. + */ +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + __u16 engine_class; + __u16 engine_instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 flags; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd; +}; + enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */ -- cgit From 8142bd82a59e452fefea7b21113101d6a87d9fa8 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 25 Mar 2019 11:34:04 -0300 Subject: tools headers: Update x86's syscall_64.tbl and uapi/asm-generic/unistd To pick up the changes introduced in the following csets: 2b188cc1bb85 ("Add io_uring IO interface") edafccee56ff ("io_uring: add support for pre-mapped user IO buffers") 3eb39f47934f ("signal: add pidfd_send_signal() syscall") This makes 'perf trace' to become aware of these new syscalls, so that one can use them like 'perf trace -e ui_uring*,*signal' to do a system wide strace-like session looking at those syscalls, for instance. For example: # perf trace -s io_uring-cp ~acme/isos/RHEL-x86_64-dvd1.iso ~/bla Summary of events: io_uring-cp (383), 1208866 events, 100.0% syscall calls total min avg max stddev (msec) (msec) (msec) (msec) (%) -------------- ------ -------- ------ ------- ------- ------ io_uring_enter 605780 2955.615 0.000 0.005 33.804 1.94% openat 4 459.446 0.004 114.861 459.435 100.00% munmap 4 0.073 0.009 0.018 0.042 44.03% mmap 10 0.054 0.002 0.005 0.026 43.24% brk 28 0.038 0.001 0.001 0.003 7.51% io_uring_setup 1 0.030 0.030 0.030 0.030 0.00% mprotect 4 0.014 0.002 0.004 0.005 14.32% close 5 0.012 0.001 0.002 0.004 28.87% fstat 3 0.006 0.001 0.002 0.003 35.83% read 4 0.004 0.001 0.001 0.002 13.58% access 1 0.003 0.003 0.003 0.003 0.00% lseek 3 0.002 0.001 0.001 0.001 9.00% arch_prctl 2 0.002 0.001 0.001 0.001 0.69% execve 1 0.000 0.000 0.000 0.000 0.00% # # perf trace -e io_uring* -s io_uring-cp ~acme/isos/RHEL-x86_64-dvd1.iso ~/bla Summary of events: io_uring-cp (390), 1191250 events, 100.0% syscall calls total min avg max stddev (msec) (msec) (msec) (msec) (%) -------------- ------ -------- ------ ------ ------ ------ io_uring_enter 597093 2706.060 0.001 0.005 14.761 1.10% io_uring_setup 1 0.038 0.038 0.038 0.038 0.00% # More work needed to make the tools/perf/examples/bpf/augmented_raw_syscalls.c BPF program to copy the 'struct io_uring_params' arguments to perf's ring buffer so that 'perf trace' can use the BTF info put in place by pahole's conversion of the kernel DWARF and then auto-beautify those arguments. This patch produces the expected change in the generated syscalls table for x86_64: --- /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c.before 2019-03-26 13:37:46.679057774 -0300 +++ /tmp/build/perf/arch/x86/include/generated/asm/syscalls_64.c 2019-03-26 13:38:12.755990383 -0300 @@ -334,5 +334,9 @@ static const char *syscalltbl_x86_64[] = [332] = "statx", [333] = "io_pgetevents", [334] = "rseq", + [424] = "pidfd_send_signal", + [425] = "io_uring_setup", + [426] = "io_uring_enter", + [427] = "io_uring_register", }; -#define SYSCALLTBL_x86_64_MAX_ID 334 +#define SYSCALLTBL_x86_64_MAX_ID 427 This silences these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/asm-generic/unistd.h' differs from latest version at 'include/uapi/asm-generic/unistd.h' diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Cc: Adrian Hunter Cc: Andrii Nakryiko Cc: Christian Brauner Cc: Daniel Borkmann Cc: Jens Axboe Cc: Jiri Olsa Cc: Martin KaFai Lau Cc: Namhyung Kim Cc: Song Liu Cc: Yonghong Song Link: https://lkml.kernel.org/n/tip-p0ars3otuc52x5iznf21shhw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/unistd.h | 11 ++++++++++- tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 4 ++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 12cdf611d217..dee7292e1df6 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -824,8 +824,17 @@ __SYSCALL(__NR_futex_time64, sys_futex) __SYSCALL(__NR_sched_rr_get_interval_time64, sys_sched_rr_get_interval) #endif +#define __NR_pidfd_send_signal 424 +__SYSCALL(__NR_pidfd_send_signal, sys_pidfd_send_signal) +#define __NR_io_uring_setup 425 +__SYSCALL(__NR_io_uring_setup, sys_io_uring_setup) +#define __NR_io_uring_enter 426 +__SYSCALL(__NR_io_uring_enter, sys_io_uring_enter) +#define __NR_io_uring_register 427 +__SYSCALL(__NR_io_uring_register, sys_io_uring_register) + #undef __NR_syscalls -#define __NR_syscalls 424 +#define __NR_syscalls 428 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 2ae92fddb6d5..92ee0b4378d4 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -345,6 +345,10 @@ 334 common rseq __x64_sys_rseq # don't use numbers 387 through 423, add new calls after the last # 'common' entry +424 common pidfd_send_signal __x64_sys_pidfd_send_signal +425 common io_uring_setup __x64_sys_io_uring_setup +426 common io_uring_enter __x64_sys_io_uring_enter +427 common io_uring_register __x64_sys_io_uring_register # # x32-specific system call numbers start at 512 to avoid cache impact -- cgit From 707c373c846cf6e27a47a2c093d243a35c691b62 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 26 Mar 2019 13:45:58 -0300 Subject: tools headers uapi: Sync powerpc's asm/kvm.h copy with the kernel sources To pick up the changes in: 2b57ecd0208f ("KVM: PPC: Book3S: Add count cache flush parameters to kvmppc_get_cpu_char()") That don't cause any changes in the tools. This silences this perf build warning: Warning: Kernel ABI header at 'tools/arch/powerpc/include/uapi/asm/kvm.h' differs from latest version at 'arch/powerpc/include/uapi/asm/kvm.h' diff -u tools/arch/powerpc/include/uapi/asm/kvm.h arch/powerpc/include/uapi/asm/kvm.h Cc: Adrian Hunter Cc: Jiri Olsa Cc: Namhyung Kim Cc: Paul Mackerras Cc: Suraj Jitindar Singh Link: https://lkml.kernel.org/n/tip-4pb7ywp9536hub2pnj4hu6i4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/powerpc/include/uapi/asm/kvm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 8c876c166ef2..26ca425f4c2c 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -463,10 +463,12 @@ struct kvm_ppc_cpu_char { #define KVM_PPC_CPU_CHAR_BR_HINT_HONOURED (1ULL << 58) #define KVM_PPC_CPU_CHAR_MTTRIG_THR_RECONF (1ULL << 57) #define KVM_PPC_CPU_CHAR_COUNT_CACHE_DIS (1ULL << 56) +#define KVM_PPC_CPU_CHAR_BCCTR_FLUSH_ASSIST (1ull << 54) #define KVM_PPC_CPU_BEHAV_FAVOUR_SECURITY (1ULL << 63) #define KVM_PPC_CPU_BEHAV_L1D_FLUSH_PR (1ULL << 62) #define KVM_PPC_CPU_BEHAV_BNDS_CHK_SPEC_BAR (1ULL << 61) +#define KVM_PPC_CPU_BEHAV_FLUSH_COUNT_CACHE (1ull << 58) /* Per-vcpu XICS interrupt controller state */ #define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) -- cgit From 977c7a6d1e263ff1d755f28595b99e4bc0c48a9f Mon Sep 17 00:00:00 2001 From: Wei Li Date: Thu, 28 Feb 2019 17:20:03 +0800 Subject: perf machine: Update kernel map address and re-order properly Since commit 1fb87b8e9599 ("perf machine: Don't search for active kernel start in __machine__create_kernel_maps"), the __machine__create_kernel_maps() just create a map what start and end are both zero. Though the address will be updated later, the order of map in the rbtree may be incorrect. The commit ee05d21791db ("perf machine: Set main kernel end address properly") fixed the logic in machine__create_kernel_maps(), but it's still wrong in function machine__process_kernel_mmap_event(). To reproduce this issue, we need an environment which the module address is before the kernel text segment. I tested it on an aarch64 machine with kernel 4.19.25: [root@localhost hulk]# grep _stext /proc/kallsyms ffff000008081000 T _stext [root@localhost hulk]# grep _etext /proc/kallsyms ffff000009780000 R _etext [root@localhost hulk]# tail /proc/modules hisi_sas_v2_hw 77824 0 - Live 0xffff00000191d000 nvme_core 126976 7 nvme, Live 0xffff0000018b6000 mdio 20480 1 ixgbe, Live 0xffff0000018ab000 hisi_sas_main 106496 1 hisi_sas_v2_hw, Live 0xffff000001861000 hns_mdio 20480 2 - Live 0xffff000001822000 hnae 28672 3 hns_dsaf,hns_enet_drv, Live 0xffff000001815000 dm_mirror 40960 0 - Live 0xffff000001804000 dm_region_hash 32768 1 dm_mirror, Live 0xffff0000017f5000 dm_log 32768 2 dm_mirror,dm_region_hash, Live 0xffff0000017e7000 dm_mod 315392 17 dm_mirror,dm_log, Live 0xffff000001780000 [root@localhost hulk]# Before fix: [root@localhost bin]# perf record sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data (9 samples) ] [root@localhost bin]# perf buildid-list -i perf.data 4c4e46c971ca935f781e603a09b52a92e8bdfee8 [vdso] [root@localhost bin]# perf buildid-list -i perf.data -H 0000000000000000000000000000000000000000 /proc/kcore [root@localhost bin]# After fix: [root@localhost tools]# ./perf/perf record sleep 3 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.011 MB perf.data (9 samples) ] [root@localhost tools]# ./perf/perf buildid-list -i perf.data 28a6c690262896dbd1b5e1011ed81623e6db0610 [kernel.kallsyms] 106c14ce6e4acea3453e484dc604d66666f08a2f [vdso] [root@localhost tools]# ./perf/perf buildid-list -i perf.data -H 28a6c690262896dbd1b5e1011ed81623e6db0610 /proc/kcore Signed-off-by: Wei Li Acked-by: Jiri Olsa Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: David Ahern Cc: Hanjun Guo Cc: Kim Phillips Cc: Li Bin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20190228092003.34071-1-liwei391@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 61959aba7e27..3c520baa198c 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1421,6 +1421,20 @@ static void machine__set_kernel_mmap(struct machine *machine, machine->vmlinux_map->end = ~0ULL; } +static void machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct map *map = machine__kernel_map(machine); + + map__get(map); + map_groups__remove(&machine->kmaps, map); + + machine__set_kernel_mmap(machine, start, end); + + map_groups__insert(&machine->kmaps, map); + map__put(map); +} + int machine__create_kernel_maps(struct machine *machine) { struct dso *kernel = machine__get_kernel(machine); @@ -1453,17 +1467,11 @@ int machine__create_kernel_maps(struct machine *machine) goto out_put; } - /* we have a real start address now, so re-order the kmaps */ - map = machine__kernel_map(machine); - - map__get(map); - map_groups__remove(&machine->kmaps, map); - - /* assume it's the last in the kmaps */ - machine__set_kernel_mmap(machine, addr, ~0ULL); - - map_groups__insert(&machine->kmaps, map); - map__put(map); + /* + * we have a real start address now, so re-order the kmaps + * assume it's the last in the kmaps + */ + machine__update_kernel_mmap(machine, addr, ~0ULL); } if (machine__create_extra_kernel_maps(machine, kernel)) @@ -1599,7 +1607,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine, if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); - machine__set_kernel_mmap(machine, event->mmap.start, + machine__update_kernel_mmap(machine, event->mmap.start, event->mmap.start + event->mmap.len); /* -- cgit From 8453c936db20489dbf0957187dca9a2656a2a7b6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Mar 2019 09:28:25 +0200 Subject: perf scripts python: exported-sql-viewer.py: Fix never-ending loop pyside version 1 fails to handle python3 large integers in some cases, resulting in Qt getting into a never-ending loop. This affects: samples Table samples_view Table All branches Report Selected branches Report Add workarounds for those cases. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Fixes: beda0e725e5f ("perf script python: Add Python3 support to exported-sql-viewer.py") Link: http://lkml.kernel.org/r/20190327072826.19168-2-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 60 ++++++++++++++++++++---- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index e38518cdcbc3..0cf30956064a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -107,6 +107,7 @@ import os from PySide.QtCore import * from PySide.QtGui import * from PySide.QtSql import * +pyside_version_1 = True from decimal import * from ctypes import * from multiprocessing import Process, Array, Value, Event @@ -1526,6 +1527,19 @@ def BranchDataPrep(query): " (" + dsoname(query.value(15)) + ")") return data +def BranchDataPrepWA(query): + data = [] + data.append(query.value(0)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(1))) + for i in xrange(2, 8): + data.append(query.value(i)) + data.append(tohex(query.value(8)).rjust(16) + " " + query.value(9) + offstr(query.value(10)) + + " (" + dsoname(query.value(11)) + ")" + " -> " + + tohex(query.value(12)) + " " + query.value(13) + offstr(query.value(14)) + + " (" + dsoname(query.value(15)) + ")") + return data + # Branch data model class BranchModel(TreeModel): @@ -1553,7 +1567,11 @@ class BranchModel(TreeModel): " AND evsel_id = " + str(self.event_id) + " ORDER BY samples.id" " LIMIT " + str(glb_chunk_sz)) - self.fetcher = SQLFetcher(glb, sql, BranchDataPrep, self.AddSample) + if pyside_version_1 and sys.version_info[0] == 3: + prep = BranchDataPrepWA + else: + prep = BranchDataPrep + self.fetcher = SQLFetcher(glb, sql, prep, self.AddSample) self.fetcher.done.connect(self.Update) self.fetcher.Fetch(glb_chunk_sz) @@ -2079,14 +2097,6 @@ def IsSelectable(db, table, sql = ""): return False return True -# SQL data preparation - -def SQLTableDataPrep(query, count): - data = [] - for i in xrange(count): - data.append(query.value(i)) - return data - # SQL table data model item class SQLTableItem(): @@ -2110,7 +2120,7 @@ class SQLTableModel(TableModel): self.more = True self.populated = 0 self.column_headers = column_headers - self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): SQLTableDataPrep(x, y), self.AddSample) + self.fetcher = SQLFetcher(glb, sql, lambda x, y=len(column_headers): self.SQLTableDataPrep(x, y), self.AddSample) self.fetcher.done.connect(self.Update) self.fetcher.Fetch(glb_chunk_sz) @@ -2154,6 +2164,12 @@ class SQLTableModel(TableModel): def columnHeader(self, column): return self.column_headers[column] + def SQLTableDataPrep(self, query, count): + data = [] + for i in xrange(count): + data.append(query.value(i)) + return data + # SQL automatic table data model class SQLAutoTableModel(SQLTableModel): @@ -2182,8 +2198,32 @@ class SQLAutoTableModel(SQLTableModel): QueryExec(query, "SELECT column_name FROM information_schema.columns WHERE table_schema = '" + schema + "' and table_name = '" + select_table_name + "'") while query.next(): column_headers.append(query.value(0)) + if pyside_version_1 and sys.version_info[0] == 3: + if table_name == "samples_view": + self.SQLTableDataPrep = self.samples_view_DataPrep + if table_name == "samples": + self.SQLTableDataPrep = self.samples_DataPrep super(SQLAutoTableModel, self).__init__(glb, sql, column_headers, parent) + def samples_view_DataPrep(self, query, count): + data = [] + data.append(query.value(0)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(1))) + for i in xrange(2, count): + data.append(query.value(i)) + return data + + def samples_DataPrep(self, query, count): + data = [] + for i in xrange(9): + data.append(query.value(i)) + # Workaround pyside failing to handle large integers (i.e. time) in python3 by converting to a string + data.append("{:>19}".format(query.value(9))) + for i in xrange(10, count): + data.append(query.value(i)) + return data + # Base class for custom ResizeColumnsToContents class ResizeColumnsToContentsBase(QObject): -- cgit From 606bd60ab6fbcb7f73deeef4fa37cfd5e447a200 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 27 Mar 2019 09:28:26 +0200 Subject: perf scripts python: exported-sql-viewer.py: Fix python3 support Unlike python2, python3 strings are not compatible with byte strings. That results in disassembly not working for the branches reports. Fixup those places overlooked in the port to python3. Signed-off-by: Adrian Hunter Cc: Jiri Olsa Fixes: beda0e725e5f ("perf script python: Add Python3 support to exported-sql-viewer.py") Link: http://lkml.kernel.org/r/20190327072826.19168-3-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/exported-sql-viewer.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tools/perf/scripts/python/exported-sql-viewer.py b/tools/perf/scripts/python/exported-sql-viewer.py index 0cf30956064a..74ef92f1d19a 100755 --- a/tools/perf/scripts/python/exported-sql-viewer.py +++ b/tools/perf/scripts/python/exported-sql-viewer.py @@ -2908,9 +2908,13 @@ class LibXED(): ok = self.xed_format_context(2, inst.xedp, inst.bufferp, sizeof(inst.buffer), ip, 0, 0) if not ok: return 0, "" + if sys.version_info[0] == 2: + result = inst.buffer.value + else: + result = inst.buffer.value.decode() # Return instruction length and the disassembled instruction text # For now, assume the length is in byte 166 - return inst.xedd[166], inst.buffer.value + return inst.xedd[166], result def TryOpen(file_name): try: @@ -2926,9 +2930,14 @@ def Is64Bit(f): header = f.read(7) f.seek(pos) magic = header[0:4] - eclass = ord(header[4]) - encoding = ord(header[5]) - version = ord(header[6]) + if sys.version_info[0] == 2: + eclass = ord(header[4]) + encoding = ord(header[5]) + version = ord(header[6]) + else: + eclass = header[4] + encoding = header[5] + version = header[6] if magic == chr(127) + "ELF" and eclass > 0 and eclass < 3 and encoding > 0 and encoding < 3 and version == 1: result = True if eclass == 2 else False return result -- cgit From e94d6b7f615e6dfbaf9fba7db6011db561461d0c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 15 Mar 2019 11:00:14 -0700 Subject: perf pmu: Fix parser error for uncore event alias Perf fails to parse uncore event alias, for example: # perf stat -e unc_m_clockticks -a --no-merge sleep 1 event syntax error: 'unc_m_clockticks' \___ parser error Current code assumes that the event alias is from one specific PMU. To find the PMU, perf strcmps the PMU name of event alias with the real PMU name on the system. However, the uncore event alias may be from multiple PMUs with common prefix. The PMU name of uncore event alias is the common prefix. For example, UNC_M_CLOCKTICKS is clock event for iMC, which include 6 PMUs with the same prefix "uncore_imc" on a skylake server. The real PMU names on the system for iMC are uncore_imc_0 ... uncore_imc_5. The strncmp is used to only check the common prefix for uncore event alias. With the patch: # perf stat -e unc_m_clockticks -a --no-merge sleep 1 Performance counter stats for 'system wide': 723,594,722 unc_m_clockticks [uncore_imc_5] 724,001,954 unc_m_clockticks [uncore_imc_3] 724,042,655 unc_m_clockticks [uncore_imc_1] 724,161,001 unc_m_clockticks [uncore_imc_4] 724,293,713 unc_m_clockticks [uncore_imc_2] 724,340,901 unc_m_clockticks [uncore_imc_0] 1.002090060 seconds time elapsed Signed-off-by: Kan Liang Acked-by: Jiri Olsa Cc: Andi Kleen Cc: Thomas Richter Cc: stable@vger.kernel.org Fixes: ea1fa48c055f ("perf stat: Handle different PMU names with common prefix") Link: http://lkml.kernel.org/r/1552672814-156173-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 6199a3174ab9..e0429f4ef335 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -732,10 +732,20 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) if (!is_arm_pmu_core(name)) { pname = pe->pmu ? pe->pmu : "cpu"; + + /* + * uncore alias may be from different PMU + * with common prefix + */ + if (pmu_is_uncore(name) && + !strncmp(pname, name, strlen(pname))) + goto new_alias; + if (strcmp(pname, name)) continue; } +new_alias: /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, (char *)pe->desc, (char *)pe->event, -- cgit From 6f845ebec2706841d15831fab3ffffcfd9e676fa Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 26 Mar 2019 18:00:31 +0530 Subject: powerpc/pseries/mce: Fix misleading print for TLB mutlihit On pseries, TLB multihit are reported as D-Cache Multihit. This is because the wrongly populated mc_err_types[] array. Per PAPR, TLB error type is 0x04 and mc_err_types[4] points to "D-Cache" instead of "TLB" string. Fixup the mc_err_types[] array. Machine check error type per PAPR: 0x00 = Uncorrectable Memory Error (UE) 0x01 = SLB error 0x02 = ERAT Error 0x04 = TLB error 0x05 = D-Cache error 0x07 = I-Cache error Fixes: 8f0b80561f21 ("powerpc/pseries: Display machine check error details.") Cc: stable@vger.kernel.org # v4.20+ Reported-by: Aneesh Kumar K.V Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/pseries/ras.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index d97d52772789..452dcfd7e5dd 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -550,6 +550,7 @@ static void pseries_print_mce_info(struct pt_regs *regs, "UE", "SLB", "ERAT", + "Unknown", "TLB", "D-Cache", "Unknown", -- cgit From f560bd19d2fe0e54851d706b72acbc6f2eed3567 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Thu, 28 Mar 2019 12:42:33 +0100 Subject: x86/realmode: Make set_real_mode_mem() static inline Remove the unused @size argument and move it into a header file, so it can be inlined. [ bp: Massage. ] Signed-off-by: Matteo Croce Signed-off-by: Borislav Petkov Reviewed-by: Mukesh Ojha Cc: Ard Biesheuvel Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: linux-efi Cc: platform-driver-x86@vger.kernel.org Cc: Thomas Gleixner Cc: x86-ml Link: https://lkml.kernel.org/r/20190328114233.27835-1-mcroce@redhat.com --- arch/x86/include/asm/realmode.h | 6 +++++- arch/x86/platform/efi/quirks.c | 2 +- arch/x86/realmode/init.c | 9 +-------- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/realmode.h b/arch/x86/include/asm/realmode.h index 63b3393bd98e..c53682303c9c 100644 --- a/arch/x86/include/asm/realmode.h +++ b/arch/x86/include/asm/realmode.h @@ -77,7 +77,11 @@ static inline size_t real_mode_size_needed(void) return ALIGN(real_mode_blob_end - real_mode_blob, PAGE_SIZE); } -void set_real_mode_mem(phys_addr_t mem, size_t size); +static inline void set_real_mode_mem(phys_addr_t mem) +{ + real_mode_header = (struct real_mode_header *) __va(mem); +} + void reserve_real_mode(void); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c index 458a0e2bcc57..a25a9fd987a9 100644 --- a/arch/x86/platform/efi/quirks.c +++ b/arch/x86/platform/efi/quirks.c @@ -449,7 +449,7 @@ void __init efi_free_boot_services(void) */ rm_size = real_mode_size_needed(); if (rm_size && (start + rm_size) < (1<<20) && size >= rm_size) { - set_real_mode_mem(start, rm_size); + set_real_mode_mem(start); start += rm_size; size -= rm_size; } diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index 47d097946872..7dce39c8c034 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c @@ -15,13 +15,6 @@ u32 *trampoline_cr4_features; /* Hold the pgd entry used on booting additional CPUs */ pgd_t trampoline_pgd_entry; -void __init set_real_mode_mem(phys_addr_t mem, size_t size) -{ - void *base = __va(mem); - - real_mode_header = (struct real_mode_header *) base; -} - void __init reserve_real_mode(void) { phys_addr_t mem; @@ -40,7 +33,7 @@ void __init reserve_real_mode(void) } memblock_reserve(mem, size); - set_real_mode_mem(mem, size); + set_real_mode_mem(mem); } static void __init setup_real_mode(void) -- cgit From 9c38f1f044080392603c497ecca4d7d09876ff99 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Mon, 25 Mar 2019 15:16:47 +0000 Subject: kconfig/[mn]conf: handle backspace (^H) key Backspace is not working on some terminal emulators which do not send the key code defined by terminfo. Terminals either send '^H' (8) or '^?' (127). But currently only '^?' is handled. Let's also handle '^H' for those terminals. Signed-off-by: Changbin Du Signed-off-by: Masahiro Yamada --- scripts/kconfig/lxdialog/inputbox.c | 3 ++- scripts/kconfig/nconf.c | 2 +- scripts/kconfig/nconf.gui.c | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/scripts/kconfig/lxdialog/inputbox.c b/scripts/kconfig/lxdialog/inputbox.c index 611945611bf8..1dcfb288ee63 100644 --- a/scripts/kconfig/lxdialog/inputbox.c +++ b/scripts/kconfig/lxdialog/inputbox.c @@ -113,7 +113,8 @@ do_resize: case KEY_DOWN: break; case KEY_BACKSPACE: - case 127: + case 8: /* ^H */ + case 127: /* ^? */ if (pos) { wattrset(dialog, dlg.inputbox.atr); if (input_x == 0) { diff --git a/scripts/kconfig/nconf.c b/scripts/kconfig/nconf.c index a4670f4e825a..ac92c0ded6c5 100644 --- a/scripts/kconfig/nconf.c +++ b/scripts/kconfig/nconf.c @@ -1048,7 +1048,7 @@ static int do_match(int key, struct match_state *state, int *ans) state->match_direction = FIND_NEXT_MATCH_UP; *ans = get_mext_match(state->pattern, state->match_direction); - } else if (key == KEY_BACKSPACE || key == 127) { + } else if (key == KEY_BACKSPACE || key == 8 || key == 127) { state->pattern[strlen(state->pattern)-1] = '\0'; adj_match_dir(&state->match_direction); } else diff --git a/scripts/kconfig/nconf.gui.c b/scripts/kconfig/nconf.gui.c index 7be620a1fcdb..77f525a8617c 100644 --- a/scripts/kconfig/nconf.gui.c +++ b/scripts/kconfig/nconf.gui.c @@ -439,7 +439,8 @@ int dialog_inputbox(WINDOW *main_window, case KEY_F(F_EXIT): case KEY_F(F_BACK): break; - case 127: + case 8: /* ^H */ + case 127: /* ^? */ case KEY_BACKSPACE: if (cursor_position > 0) { memmove(&result[cursor_position-1], -- cgit From 8aafaaf2212192012f5bae305bb31cdf7681d777 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 28 Mar 2019 11:44:59 +0100 Subject: iommu/amd: Reserve exclusion range in iova-domain If a device has an exclusion range specified in the IVRS table, this region needs to be reserved in the iova-domain of that device. This hasn't happened until now and can cause data corruption on data transfered with these devices. Treat exclusion ranges as reserved regions in the iommu-core to fix the problem. Fixes: be2a022c0dd0 ('x86, AMD IOMMU: add functions to parse IOMMU memory mapping requirements for devices') Signed-off-by: Joerg Roedel Reviewed-by: Gary R Hook --- drivers/iommu/amd_iommu.c | 9 ++++++--- drivers/iommu/amd_iommu_init.c | 7 ++++--- drivers/iommu/amd_iommu_types.h | 2 ++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 21cb088d6687..f7cdd2ab7f11 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3169,21 +3169,24 @@ static void amd_iommu_get_resv_regions(struct device *dev, return; list_for_each_entry(entry, &amd_iommu_unity_map, list) { + int type, prot = 0; size_t length; - int prot = 0; if (devid < entry->devid_start || devid > entry->devid_end) continue; + type = IOMMU_RESV_DIRECT; length = entry->address_end - entry->address_start; if (entry->prot & IOMMU_PROT_IR) prot |= IOMMU_READ; if (entry->prot & IOMMU_PROT_IW) prot |= IOMMU_WRITE; + if (entry->prot & IOMMU_UNITY_MAP_FLAG_EXCL_RANGE) + /* Exclusion range */ + type = IOMMU_RESV_RESERVED; region = iommu_alloc_resv_region(entry->address_start, - length, prot, - IOMMU_RESV_DIRECT); + length, prot, type); if (!region) { dev_err(dev, "Out of memory allocating dm-regions\n"); return; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index f773792d77fd..1b1378619fc9 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -2013,6 +2013,9 @@ static int __init init_unity_map_range(struct ivmd_header *m) if (e == NULL) return -ENOMEM; + if (m->flags & IVMD_FLAG_EXCL_RANGE) + init_exclusion_range(m); + switch (m->type) { default: kfree(e); @@ -2059,9 +2062,7 @@ static int __init init_memory_definitions(struct acpi_table_header *table) while (p < end) { m = (struct ivmd_header *)p; - if (m->flags & IVMD_FLAG_EXCL_RANGE) - init_exclusion_range(m); - else if (m->flags & IVMD_FLAG_UNITY_MAP) + if (m->flags & (IVMD_FLAG_UNITY_MAP | IVMD_FLAG_EXCL_RANGE)) init_unity_map_range(m); p += m->length; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index eae0741f72dc..87965e4d9647 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -374,6 +374,8 @@ #define IOMMU_PROT_IR 0x01 #define IOMMU_PROT_IW 0x02 +#define IOMMU_UNITY_MAP_FLAG_EXCL_RANGE (1 << 2) + /* IOMMU capabilities */ #define IOMMU_CAP_IOTLB 24 #define IOMMU_CAP_NPCACHE 26 -- cgit From 33bac912840fe64dbc15556302537dc6a17cac63 Mon Sep 17 00:00:00 2001 From: Gao Xiang Date: Fri, 29 Mar 2019 04:14:58 +0800 Subject: staging: erofs: keep corrupted fs from crashing kernel in erofs_readdir() After commit 419d6efc50e9, kernel cannot be crashed in the namei path. However, corrupted nameoff can do harm in the process of readdir for scenerios without dm-verity as well. Fix it now. Fixes: 3aa8ec716e52 ("staging: erofs: add directory operations") Cc: # 4.19+ Signed-off-by: Gao Xiang Reviewed-by: Chao Yu Signed-off-by: Greg Kroah-Hartman --- drivers/staging/erofs/dir.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) diff --git a/drivers/staging/erofs/dir.c b/drivers/staging/erofs/dir.c index 829f7b12e0dc..9bbc68729c11 100644 --- a/drivers/staging/erofs/dir.c +++ b/drivers/staging/erofs/dir.c @@ -23,6 +23,21 @@ static const unsigned char erofs_filetype_table[EROFS_FT_MAX] = { [EROFS_FT_SYMLINK] = DT_LNK, }; +static void debug_one_dentry(unsigned char d_type, const char *de_name, + unsigned int de_namelen) +{ +#ifdef CONFIG_EROFS_FS_DEBUG + /* since the on-disk name could not have the trailing '\0' */ + unsigned char dbg_namebuf[EROFS_NAME_LEN + 1]; + + memcpy(dbg_namebuf, de_name, de_namelen); + dbg_namebuf[de_namelen] = '\0'; + + debugln("found dirent %s de_len %u d_type %d", dbg_namebuf, + de_namelen, d_type); +#endif +} + static int erofs_fill_dentries(struct dir_context *ctx, void *dentry_blk, unsigned int *ofs, unsigned int nameoff, unsigned int maxsize) @@ -33,14 +48,10 @@ static int erofs_fill_dentries(struct dir_context *ctx, de = dentry_blk + *ofs; while (de < end) { const char *de_name; - int de_namelen; + unsigned int de_namelen; unsigned char d_type; -#ifdef CONFIG_EROFS_FS_DEBUG - unsigned int dbg_namelen; - unsigned char dbg_namebuf[EROFS_NAME_LEN]; -#endif - if (unlikely(de->file_type < EROFS_FT_MAX)) + if (de->file_type < EROFS_FT_MAX) d_type = erofs_filetype_table[de->file_type]; else d_type = DT_UNKNOWN; @@ -48,26 +59,20 @@ static int erofs_fill_dentries(struct dir_context *ctx, nameoff = le16_to_cpu(de->nameoff); de_name = (char *)dentry_blk + nameoff; - de_namelen = unlikely(de + 1 >= end) ? - /* last directory entry */ - strnlen(de_name, maxsize - nameoff) : - le16_to_cpu(de[1].nameoff) - nameoff; + /* the last dirent in the block? */ + if (de + 1 >= end) + de_namelen = strnlen(de_name, maxsize - nameoff); + else + de_namelen = le16_to_cpu(de[1].nameoff) - nameoff; /* a corrupted entry is found */ - if (unlikely(de_namelen < 0)) { + if (unlikely(nameoff + de_namelen > maxsize || + de_namelen > EROFS_NAME_LEN)) { DBG_BUGON(1); return -EIO; } -#ifdef CONFIG_EROFS_FS_DEBUG - dbg_namelen = min(EROFS_NAME_LEN - 1, de_namelen); - memcpy(dbg_namebuf, de_name, dbg_namelen); - dbg_namebuf[dbg_namelen] = '\0'; - - debugln("%s, found de_name %s de_len %d d_type %d", __func__, - dbg_namebuf, de_namelen, d_type); -#endif - + debug_one_dentry(d_type, de_name, de_namelen); if (!dir_emit(ctx, de_name, de_namelen, le64_to_cpu(de->nid), d_type)) /* stopped by some reason */ -- cgit From cc26358f89c3e493b54766b1ca56cfc6b14db78a Mon Sep 17 00:00:00 2001 From: Malcolm Priestley Date: Wed, 27 Mar 2019 18:45:26 +0000 Subject: staging: vt6655: Remove vif check from vnt_interrupt A check for vif is made in vnt_interrupt_work. There is a small chance of leaving interrupt disabled while vif is NULL and the work hasn't been scheduled. Signed-off-by: Malcolm Priestley CC: stable@vger.kernel.org # v4.2+ Signed-off-by: Greg Kroah-Hartman --- drivers/staging/vt6655/device_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c index 83f1a1cf9182..c6bb4aaf9bd0 100644 --- a/drivers/staging/vt6655/device_main.c +++ b/drivers/staging/vt6655/device_main.c @@ -1137,8 +1137,7 @@ static irqreturn_t vnt_interrupt(int irq, void *arg) { struct vnt_private *priv = arg; - if (priv->vif) - schedule_work(&priv->interrupt_work); + schedule_work(&priv->interrupt_work); MACvIntDisable(priv->PortOffset); -- cgit From c412a769d2452161e97f163c4c4f31efc6626f06 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Mar 2019 20:43:15 -0700 Subject: kasan: fix variable 'tag' set but not used warning set_tag() compiles away when CONFIG_KASAN_SW_TAGS=n, so make arch_kasan_set_tag() a static inline function to fix warnings below. mm/kasan/common.c: In function '__kasan_kmalloc': mm/kasan/common.c:475:5: warning: variable 'tag' set but not used [-Wunused-but-set-variable] u8 tag; ^~~ Link: http://lkml.kernel.org/r/20190307185244.54648-1-cai@lca.pw Signed-off-by: Qian Cai Reviewed-by: Andrey Konovalov Cc: Andrey Ryabinin Cc: Alexander Potapenko Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/kasan.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 3e0c11f7d7a1..3ce956efa0cb 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -163,7 +163,10 @@ static inline u8 random_tag(void) #endif #ifndef arch_kasan_set_tag -#define arch_kasan_set_tag(addr, tag) ((void *)(addr)) +static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) +{ + return addr; +} #endif #ifndef arch_kasan_reset_tag #define arch_kasan_reset_tag(addr) ((void *)(addr)) -- cgit From cae85cb8add35f678cf487139d05e083ce2f570a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 28 Mar 2019 20:43:19 -0700 Subject: mm/memory.c: fix modifying of page protection by insert_pfn() Aneesh has reported that PPC triggers the following warning when excercising DAX code: IP set_pte_at+0x3c/0x190 LR insert_pfn+0x208/0x280 Call Trace: insert_pfn+0x68/0x280 dax_iomap_pte_fault.isra.7+0x734/0xa40 __xfs_filemap_fault+0x280/0x2d0 do_wp_page+0x48c/0xa40 __handle_mm_fault+0x8d0/0x1fd0 handle_mm_fault+0x140/0x250 __do_page_fault+0x300/0xd60 handle_page_fault+0x18 Now that is WARN_ON in set_pte_at which is VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep)); The problem is that on some architectures set_pte_at() cannot cope with a situation where there is already some (different) valid entry present. Use ptep_set_access_flags() instead to modify the pfn which is built to deal with modifying existing PTE. Link: http://lkml.kernel.org/r/20190311084537.16029-1-jack@suse.cz Fixes: b2770da64254 "mm: add vm_insert_mixed_mkwrite()" Signed-off-by: Jan Kara Reported-by: "Aneesh Kumar K.V" Reviewed-by: Aneesh Kumar K.V Acked-by: Dan Williams Cc: Chandan Rajendra Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 47fe250307c7..ab650c21bccd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1549,10 +1549,12 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, WARN_ON_ONCE(!is_zero_pfn(pte_pfn(*pte))); goto out_unlock; } - entry = *pte; - goto out_mkwrite; - } else - goto out_unlock; + entry = pte_mkyoung(*pte); + entry = maybe_mkwrite(pte_mkdirty(entry), vma); + if (ptep_set_access_flags(vma, addr, pte, entry, 1)) + update_mmu_cache(vma, addr, pte); + } + goto out_unlock; } /* Ok, finally just insert the thing.. */ @@ -1561,7 +1563,6 @@ static vm_fault_t insert_pfn(struct vm_area_struct *vma, unsigned long addr, else entry = pte_mkspecial(pfn_t_pte(pfn, prot)); -out_mkwrite: if (mkwrite) { entry = pte_mkyoung(entry); entry = maybe_mkwrite(pte_mkdirty(entry), vma); -- cgit From 44dc1b1fab787d265b9b3064bd564c87b6b86397 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Mar 2019 20:43:23 -0700 Subject: mm/debug.c: add a cast to u64 for atomic64_read() atomic64_read() on ppc64le returns "long int", so fix the same way as commit d549f545e690 ("drm/virtio: use %llu format string form atomic64_t") by adding a cast to u64, which makes it work on all arches. In file included from ./include/linux/printk.h:7, from ./include/linux/kernel.h:15, from mm/debug.c:9: mm/debug.c: In function 'dump_mm': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 19 has type 'long int' [-Wformat=] #define KERN_SOH "A" /* ASCII Start Of Header */ ^~~~~~ ./include/linux/kern_levels.h:8:20: note: in expansion of macro 'KERN_SOH' #define KERN_EMERG KERN_SOH "0" /* system is unusable */ ^~~~~~~~ ./include/linux/printk.h:297:9: note: in expansion of macro 'KERN_EMERG' printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) ^~~~~~~~~~ mm/debug.c:133:2: note: in expansion of macro 'pr_emerg' pr_emerg("mm %px mmap %px seqnum %llu task_size %lu" ^~~~~~~~ mm/debug.c:140:17: note: format string is defined here "pinned_vm %llx data_vm %lx exec_vm %lx stack_vm %lx" ~~~^ %lx Link: http://lkml.kernel.org/r/20190310183051.87303-1-cai@lca.pw Fixes: 70f8a3ca68d3 ("mm: make mm->pinned_vm an atomic64 counter") Signed-off-by: Qian Cai Acked-by: Davidlohr Bueso Cc: Jason Gunthorpe Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/debug.c b/mm/debug.c index c0b31b6c3877..45d9eb77b84e 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -168,7 +168,7 @@ void dump_mm(const struct mm_struct *mm) mm_pgtables_bytes(mm), mm->map_count, mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm, - atomic64_read(&mm->pinned_vm), + (u64)atomic64_read(&mm->pinned_vm), mm->data_vm, mm->exec_vm, mm->stack_vm, mm->start_code, mm->end_code, mm->start_data, mm->end_data, mm->start_brk, mm->brk, mm->start_stack, -- cgit From c1e287c11b752b055257196c5e98e4e91f401b32 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Thu, 28 Mar 2019 20:43:27 -0700 Subject: mailmap: add Changbin Du Add my email in the mailmap file to have a consistent shortlog output. Link: http://lkml.kernel.org/r/20190308142103.4929-1-changbin.du@gmail.com Signed-off-by: Changbin Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index 37e1847c7988..b2cde8668dcc 100644 --- a/.mailmap +++ b/.mailmap @@ -224,3 +224,5 @@ Yakir Yang Yusuke Goda Gustavo Padovan Gustavo Padovan +Changbin Du +Changbin Du -- cgit From 73601ea5b7b18eb234219ae2adf77530f389da79 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 28 Mar 2019 20:43:30 -0700 Subject: fs/open.c: allow opening only regular files during execve() syzbot is hitting lockdep warning [1] due to trying to open a fifo during an execve() operation. But we don't need to open non regular files during an execve() operation, for all files which we will need are the executable file itself and the interpreter programs like /bin/sh and ld-linux.so.2 . Since the manpage for execve(2) says that execve() returns EACCES when the file or a script interpreter is not a regular file, and the manpage for uselib(2) says that uselib() can return EACCES, and we use FMODE_EXEC when opening for execve()/uselib(), we can bail out if a non regular file is requested with FMODE_EXEC set. Since this deadlock followed by khungtaskd warnings is trivially reproducible by a local unprivileged user, and syzbot's frequent crash due to this deadlock defers finding other bugs, let's workaround this deadlock until we get a chance to find a better solution. [1] https://syzkaller.appspot.com/bug?id=b5095bfec44ec84213bac54742a82483aad578ce Link: http://lkml.kernel.org/r/1552044017-7890-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Reported-by: syzbot Fixes: 8924feff66f35fe2 ("splice: lift pipe_lock out of splice_to_pipe()") Signed-off-by: Tetsuo Handa Acked-by: Kees Cook Cc: Al Viro Cc: Eric Biggers Cc: Dmitry Vyukov Cc: [4.9+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/open.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/open.c b/fs/open.c index 0285ce7dbd51..f1c2f855fd43 100644 --- a/fs/open.c +++ b/fs/open.c @@ -733,6 +733,12 @@ static int do_dentry_open(struct file *f, return 0; } + /* Any file opened for execve()/uselib() has to be a regular file. */ + if (unlikely(f->f_flags & FMODE_EXEC && !S_ISREG(inode->i_mode))) { + error = -EACCES; + goto cleanup_file; + } + if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) { error = get_write_access(inode); if (unlikely(error)) -- cgit From 9b7ea46a82b31c74a37e6ff1c2a1df7d53e392ab Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Mar 2019 20:43:34 -0700 Subject: mm/hotplug: fix offline undo_isolate_page_range() Commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online") introduced move_pfn_range_to_zone() which calls memmap_init_zone() during onlining a memory block. memmap_init_zone() will reset pagetype flags and makes migrate type to be MOVABLE. However, in __offline_pages(), it also call undo_isolate_page_range() after offline_isolated_pages() to do the same thing. Due to commit 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") changed __first_valid_page() to skip offline pages, undo_isolate_page_range() here just waste CPU cycles looping around the offlining PFN range while doing nothing, because __first_valid_page() will return NULL as offline_isolated_pages() has already marked all memory sections within the pfn range as offline via offline_mem_sections(). Also, after calling the "useless" undo_isolate_page_range() here, it reaches the point of no returning by notifying MEM_OFFLINE. Those pages will be marked as MIGRATE_MOVABLE again once onlining. The only thing left to do is to decrease the number of isolated pageblocks zone counter which would make some paths of the page allocation slower that the above commit introduced. Even if alloc_contig_range() can be used to isolate 16GB-hugetlb pages on ppc64, an "int" should still be enough to represent the number of pageblocks there. Fix an incorrect comment along the way. [cai@lca.pw: v4] Link: http://lkml.kernel.org/r/20190314150641.59358-1-cai@lca.pw Link: http://lkml.kernel.org/r/20190313143133.46200-1-cai@lca.pw Fixes: 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") Signed-off-by: Qian Cai Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Cc: Vlastimil Babka Cc: [4.13+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-isolation.h | 10 --------- mm/memory_hotplug.c | 17 +++++++++++---- mm/page_alloc.c | 2 +- mm/page_isolation.c | 48 ++++++++++++++++++++++++++---------------- mm/sparse.c | 2 +- 5 files changed, 45 insertions(+), 34 deletions(-) diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 4eb26d278046..280ae96dc4c3 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -41,16 +41,6 @@ int move_freepages_block(struct zone *zone, struct page *page, /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. - * If specified range includes migrate types other than MOVABLE or CMA, - * this will fail with -EBUSY. - * - * For isolating all pages in the range finally, the caller have to - * free all pages in the range. test_page_isolated() can be used for - * test it. - * - * The following flags are allowed (they can be combined in a bit mask) - * SKIP_HWPOISON - ignore hwpoison pages - * REPORT_FAILURE - report details about the failure to isolate the range */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index f767582af4f8..0e0a16021fd5 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1576,7 +1576,7 @@ static int __ref __offline_pages(unsigned long start_pfn, { unsigned long pfn, nr_pages; long offlined_pages; - int ret, node; + int ret, node, nr_isolate_pageblock; unsigned long flags; unsigned long valid_start, valid_end; struct zone *zone; @@ -1602,10 +1602,11 @@ static int __ref __offline_pages(unsigned long start_pfn, ret = start_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE, SKIP_HWPOISON | REPORT_FAILURE); - if (ret) { + if (ret < 0) { reason = "failure to isolate range"; goto failed_removal; } + nr_isolate_pageblock = ret; arg.start_pfn = start_pfn; arg.nr_pages = nr_pages; @@ -1657,8 +1658,16 @@ static int __ref __offline_pages(unsigned long start_pfn, /* Ok, all of our target is isolated. We cannot do rollback at this point. */ offline_isolated_pages(start_pfn, end_pfn); - /* reset pagetype flags and makes migrate type to be MOVABLE */ - undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + + /* + * Onlining will reset pagetype flags and makes migrate type + * MOVABLE, so just need to decrease the number of isolated + * pageblocks zone counter here. + */ + spin_lock_irqsave(&zone->lock, flags); + zone->nr_isolate_pageblock -= nr_isolate_pageblock; + spin_unlock_irqrestore(&zone->lock, flags); + /* removal success */ adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages); zone->present_pages -= offlined_pages; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 03fcf73d47da..d96ca5bc555b 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8233,7 +8233,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, ret = start_isolate_page_range(pfn_max_align_down(start), pfn_max_align_up(end), migratetype, 0); - if (ret) + if (ret < 0) return ret; /* diff --git a/mm/page_isolation.c b/mm/page_isolation.c index ce323e56b34d..bf4159d771c7 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -160,27 +160,36 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) return NULL; } -/* - * start_isolate_page_range() -- make page-allocation-type of range of pages - * to be MIGRATE_ISOLATE. - * @start_pfn: The lower PFN of the range to be isolated. - * @end_pfn: The upper PFN of the range to be isolated. - * @migratetype: migrate type to set in error recovery. +/** + * start_isolate_page_range() - make page-allocation-type of range of pages to + * be MIGRATE_ISOLATE. + * @start_pfn: The lower PFN of the range to be isolated. + * @end_pfn: The upper PFN of the range to be isolated. + * start_pfn/end_pfn must be aligned to pageblock_order. + * @migratetype: Migrate type to set in error recovery. + * @flags: The following flags are allowed (they can be combined in + * a bit mask) + * SKIP_HWPOISON - ignore hwpoison pages + * REPORT_FAILURE - report details about the failure to + * isolate the range * * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in * the range will never be allocated. Any free pages and pages freed in the - * future will not be allocated again. - * - * start_pfn/end_pfn must be aligned to pageblock_order. - * Return 0 on success and -EBUSY if any part of range cannot be isolated. + * future will not be allocated again. If specified range includes migrate types + * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all + * pages in the range finally, the caller have to free all pages in the range. + * test_page_isolated() can be used for test it. * * There is no high level synchronization mechanism that prevents two threads - * from trying to isolate overlapping ranges. If this happens, one thread + * from trying to isolate overlapping ranges. If this happens, one thread * will notice pageblocks in the overlapping range already set to isolate. * This happens in set_migratetype_isolate, and set_migratetype_isolate - * returns an error. We then clean up by restoring the migration type on - * pageblocks we may have modified and return -EBUSY to caller. This + * returns an error. We then clean up by restoring the migration type on + * pageblocks we may have modified and return -EBUSY to caller. This * prevents two threads from simultaneously working on overlapping ranges. + * + * Return: the number of isolated pageblocks on success and -EBUSY if any part + * of range cannot be isolated. */ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned migratetype, int flags) @@ -188,6 +197,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, unsigned long pfn; unsigned long undo_pfn; struct page *page; + int nr_isolate_pageblock = 0; BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages)); BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages)); @@ -196,13 +206,15 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn, pfn < end_pfn; pfn += pageblock_nr_pages) { page = __first_valid_page(pfn, pageblock_nr_pages); - if (page && - set_migratetype_isolate(page, migratetype, flags)) { - undo_pfn = pfn; - goto undo; + if (page) { + if (set_migratetype_isolate(page, migratetype, flags)) { + undo_pfn = pfn; + goto undo; + } + nr_isolate_pageblock++; } } - return 0; + return nr_isolate_pageblock; undo: for (pfn = start_pfn; pfn < undo_pfn; diff --git a/mm/sparse.c b/mm/sparse.c index 69904aa6165b..56e057c432f9 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -567,7 +567,7 @@ void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn) } #ifdef CONFIG_MEMORY_HOTREMOVE -/* Mark all memory sections within the pfn range as online */ +/* Mark all memory sections within the pfn range as offline */ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) { unsigned long pfn; -- cgit From e6a9467ea14bae8691b0f72c500510c42ea8edb8 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 28 Mar 2019 20:43:38 -0700 Subject: ocfs2: fix inode bh swapping mixup in ocfs2_reflink_inodes_lock ocfs2_reflink_inodes_lock() can swap the inode1/inode2 variables so that we always grab cluster locks in order of increasing inode number. Unfortunately, we forget to swap the inode record buffer head pointers when we've done this, which leads to incorrect bookkeepping when we're trying to make the two inodes have the same refcount tree. This has the effect of causing filesystem shutdowns if you're trying to reflink data from inode 100 into inode 97, where inode 100 already has a refcount tree attached and inode 97 doesn't. The reflink code decides to copy the refcount tree pointer from 100 to 97, but uses inode 97's inode record to open the tree root (which it doesn't have) and blows up. This issue causes filesystem shutdowns and metadata corruption! Link: http://lkml.kernel.org/r/20190312214910.GK20533@magnolia Fixes: 29ac8e856cb369 ("ocfs2: implement the VFS clone_range, copy_range, and dedupe_range features") Signed-off-by: Darrick J. Wong Reviewed-by: Joseph Qi Cc: Mark Fasheh Cc: Joel Becker Cc: Junxiao Bi Cc: Joseph Qi Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/refcounttree.c | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index a35259eebc56..1dc9a08e8bdc 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4719,22 +4719,23 @@ out: /* Lock an inode and grab a bh pointing to the inode. */ int ocfs2_reflink_inodes_lock(struct inode *s_inode, - struct buffer_head **bh1, + struct buffer_head **bh_s, struct inode *t_inode, - struct buffer_head **bh2) + struct buffer_head **bh_t) { - struct inode *inode1; - struct inode *inode2; + struct inode *inode1 = s_inode; + struct inode *inode2 = t_inode; struct ocfs2_inode_info *oi1; struct ocfs2_inode_info *oi2; + struct buffer_head *bh1 = NULL; + struct buffer_head *bh2 = NULL; bool same_inode = (s_inode == t_inode); + bool need_swap = (inode1->i_ino > inode2->i_ino); int status; /* First grab the VFS and rw locks. */ lock_two_nondirectories(s_inode, t_inode); - inode1 = s_inode; - inode2 = t_inode; - if (inode1->i_ino > inode2->i_ino) + if (need_swap) swap(inode1, inode2); status = ocfs2_rw_lock(inode1, 1); @@ -4757,17 +4758,13 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, trace_ocfs2_double_lock((unsigned long long)oi1->ip_blkno, (unsigned long long)oi2->ip_blkno); - if (*bh1) - *bh1 = NULL; - if (*bh2) - *bh2 = NULL; - /* We always want to lock the one with the lower lockid first. */ if (oi1->ip_blkno > oi2->ip_blkno) mlog_errno(-ENOLCK); /* lock id1 */ - status = ocfs2_inode_lock_nested(inode1, bh1, 1, OI_LS_REFLINK_TARGET); + status = ocfs2_inode_lock_nested(inode1, &bh1, 1, + OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); @@ -4776,15 +4773,25 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, /* lock id2 */ if (!same_inode) { - status = ocfs2_inode_lock_nested(inode2, bh2, 1, + status = ocfs2_inode_lock_nested(inode2, &bh2, 1, OI_LS_REFLINK_TARGET); if (status < 0) { if (status != -ENOENT) mlog_errno(status); goto out_cl1; } - } else - *bh2 = *bh1; + } else { + bh2 = bh1; + } + + /* + * If we swapped inode order above, we have to swap the buffer heads + * before passing them back to the caller. + */ + if (need_swap) + swap(bh1, bh2); + *bh_s = bh1; + *bh_t = bh2; trace_ocfs2_double_lock_end( (unsigned long long)oi1->ip_blkno, @@ -4794,8 +4801,7 @@ int ocfs2_reflink_inodes_lock(struct inode *s_inode, out_cl1: ocfs2_inode_unlock(inode1, 1); - brelse(*bh1); - *bh1 = NULL; + brelse(bh1); out_rw2: ocfs2_rw_unlock(inode2, 1); out_i2: -- cgit From 6d6ea1e967a246f12cfe2f5fb743b70b2e608d4a Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 28 Mar 2019 20:43:42 -0700 Subject: mm: add support for kmem caches in DMA32 zone Patch series "iommu/io-pgtable-arm-v7s: Use DMA32 zone for page tables", v6. This is a followup to the discussion in [1], [2]. IOMMUs using ARMv7 short-descriptor format require page tables (level 1 and 2) to be allocated within the first 4GB of RAM, even on 64-bit systems. For L1 tables that are bigger than a page, we can just use __get_free_pages with GFP_DMA32 (on arm64 systems only, arm would still use GFP_DMA). For L2 tables that only take 1KB, it would be a waste to allocate a full page, so we considered 3 approaches: 1. This series, adding support for GFP_DMA32 slab caches. 2. genalloc, which requires pre-allocating the maximum number of L2 page tables (4096, so 4MB of memory). 3. page_frag, which is not very memory-efficient as it is unable to reuse freed fragments until the whole page is freed. [3] This series is the most memory-efficient approach. stable@ note: We confirmed that this is a regression, and IOMMU errors happen on 4.19 and linux-next/master on MT8173 (elm, Acer Chromebook R13). The issue most likely starts from commit ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32"), i.e. 4.15, and presumably breaks a number of Mediatek platforms (and maybe others?). [1] https://lists.linuxfoundation.org/pipermail/iommu/2018-November/030876.html [2] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html [3] https://patchwork.codeaurora.org/patch/671639/ This patch (of 3): IOMMUs using ARMv7 short-descriptor format require page tables to be allocated within the first 4GB of RAM, even on 64-bit systems. On arm64, this is done by passing GFP_DMA32 flag to memory allocation functions. For IOMMU L2 tables that only take 1KB, it would be a waste to allocate a full page using get_free_pages, so we considered 3 approaches: 1. This patch, adding support for GFP_DMA32 slab caches. 2. genalloc, which requires pre-allocating the maximum number of L2 page tables (4096, so 4MB of memory). 3. page_frag, which is not very memory-efficient as it is unable to reuse freed fragments until the whole page is freed. This change makes it possible to create a custom cache in DMA32 zone using kmem_cache_create, then allocate memory using kmem_cache_alloc. We do not create a DMA32 kmalloc cache array, as there are currently no users of kmalloc(..., GFP_DMA32). These calls will continue to trigger a warning, as we keep GFP_DMA32 in GFP_SLAB_BUG_MASK. This implies that calls to kmem_cache_*alloc on a SLAB_CACHE_DMA32 kmem_cache must _not_ use GFP_DMA32 (it is anyway redundant and unnecessary). Link: http://lkml.kernel.org/r/20181210011504.122604-2-drinkcat@chromium.org Signed-off-by: Nicolas Boichat Acked-by: Vlastimil Babka Acked-by: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Michal Hocko Cc: Mel Gorman Cc: Sasha Levin Cc: Huaisheng Ye Cc: Mike Rapoport Cc: Yong Wu Cc: Matthias Brugger Cc: Tomasz Figa Cc: Yingjoe Chen Cc: Christoph Hellwig Cc: Matthew Wilcox Cc: Hsin-Yi Wang Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 ++ mm/slab.c | 2 ++ mm/slab.h | 3 ++- mm/slab_common.c | 2 +- mm/slub.c | 5 +++++ 5 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/slab.h b/include/linux/slab.h index 11b45f7ae405..9449b19c5f10 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -32,6 +32,8 @@ #define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) /* Use GFP_DMA memory */ #define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) +/* Use GFP_DMA32 memory */ +#define SLAB_CACHE_DMA32 ((slab_flags_t __force)0x00008000U) /* DEBUG: Store the last owner for bug hunting */ #define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) /* Panic if kmem_cache_create() fails */ diff --git a/mm/slab.c b/mm/slab.c index 28652e4218e0..329bfe67f2ca 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2115,6 +2115,8 @@ done: cachep->allocflags = __GFP_COMP; if (flags & SLAB_CACHE_DMA) cachep->allocflags |= GFP_DMA; + if (flags & SLAB_CACHE_DMA32) + cachep->allocflags |= GFP_DMA32; if (flags & SLAB_RECLAIM_ACCOUNT) cachep->allocflags |= __GFP_RECLAIMABLE; cachep->size = size; diff --git a/mm/slab.h b/mm/slab.h index e5e6658eeacc..43ac818b8592 100644 --- a/mm/slab.h +++ b/mm/slab.h @@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size, /* Legal flag mask for kmem_cache_create(), for various configurations */ -#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ +#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \ + SLAB_CACHE_DMA32 | SLAB_PANIC | \ SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) #if defined(CONFIG_DEBUG_SLAB) diff --git a/mm/slab_common.c b/mm/slab_common.c index 03eeb8b7b4b1..58251ba63e4a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, SLAB_FAILSLAB | SLAB_KASAN) #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ - SLAB_ACCOUNT) + SLAB_CACHE_DMA32 | SLAB_ACCOUNT) /* * Merge control. If this is set then no merging of slab caches will occur. diff --git a/mm/slub.c b/mm/slub.c index 1b08fbcb7e61..d30ede89f4a6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -3589,6 +3589,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) if (s->flags & SLAB_CACHE_DMA) s->allocflags |= GFP_DMA; + if (s->flags & SLAB_CACHE_DMA32) + s->allocflags |= GFP_DMA32; + if (s->flags & SLAB_RECLAIM_ACCOUNT) s->allocflags |= __GFP_RECLAIMABLE; @@ -5679,6 +5682,8 @@ static char *create_unique_id(struct kmem_cache *s) */ if (s->flags & SLAB_CACHE_DMA) *p++ = 'd'; + if (s->flags & SLAB_CACHE_DMA32) + *p++ = 'D'; if (s->flags & SLAB_RECLAIM_ACCOUNT) *p++ = 'a'; if (s->flags & SLAB_CONSISTENCY_CHECKS) -- cgit From 0a352554da69b02f75ca3389c885c741f1f63235 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Thu, 28 Mar 2019 20:43:46 -0700 Subject: iommu/io-pgtable-arm-v7s: request DMA32 memory, and improve debugging IOMMUs using ARMv7 short-descriptor format require page tables (level 1 and 2) to be allocated within the first 4GB of RAM, even on 64-bit systems. For level 1/2 pages, ensure GFP_DMA32 is used if CONFIG_ZONE_DMA32 is defined (e.g. on arm64 platforms). For level 2 pages, allocate a slab cache in SLAB_CACHE_DMA32. Note that we do not explicitly pass GFP_DMA[32] to kmem_cache_zalloc, as this is not strictly necessary, and would cause a warning in mm/sl*b.c, as we did not update GFP_SLAB_BUG_MASK. Also, print an error when the physical address does not fit in 32-bit, to make debugging easier in the future. Link: http://lkml.kernel.org/r/20181210011504.122604-3-drinkcat@chromium.org Fixes: ad67f5a6545f ("arm64: replace ZONE_DMA with ZONE_DMA32") Signed-off-by: Nicolas Boichat Acked-by: Will Deacon Cc: Christoph Hellwig Cc: Christoph Lameter Cc: David Rientjes Cc: Hsin-Yi Wang Cc: Huaisheng Ye Cc: Joerg Roedel Cc: Joonsoo Kim Cc: Matthew Wilcox Cc: Matthias Brugger Cc: Mel Gorman Cc: Michal Hocko Cc: Mike Rapoport Cc: Pekka Enberg Cc: Robin Murphy Cc: Sasha Levin Cc: Tomasz Figa Cc: Vlastimil Babka Cc: Yingjoe Chen Cc: Yong Wu Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/iommu/io-pgtable-arm-v7s.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index f101afc315ab..9a8a8870e267 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -160,6 +160,14 @@ #define ARM_V7S_TCR_PD1 BIT(5) +#ifdef CONFIG_ZONE_DMA32 +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 +#else +#define ARM_V7S_TABLE_GFP_DMA GFP_DMA +#define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA +#endif + typedef u32 arm_v7s_iopte; static bool selftest_running; @@ -197,13 +205,16 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, void *table = NULL; if (lvl == 1) - table = (void *)__get_dma_pages(__GFP_ZERO, get_order(size)); + table = (void *)__get_free_pages( + __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); else if (lvl == 2) - table = kmem_cache_zalloc(data->l2_tables, gfp | GFP_DMA); + table = kmem_cache_zalloc(data->l2_tables, gfp); phys = virt_to_phys(table); - if (phys != (arm_v7s_iopte)phys) + if (phys != (arm_v7s_iopte)phys) { /* Doesn't fit in PTE */ + dev_err(dev, "Page table does not fit in PTE: %pa", &phys); goto out_free; + } if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -733,7 +744,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", ARM_V7S_TABLE_SIZE(2), ARM_V7S_TABLE_SIZE(2), - SLAB_CACHE_DMA, NULL); + ARM_V7S_TABLE_SLAB_FLAGS, NULL); if (!data->l2_tables) goto out_free_data; -- cgit From a953e7721fa9999fd628885ed451e16641a23d1e Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Thu, 28 Mar 2019 20:43:51 -0700 Subject: include/linux/hugetlb.h: convert to use vm_fault_t kbuild produces the below warning: tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master head: 5453a3df2a5eb49bc24615d4cf0d66b2aae05e5f commit 3d3539018d2c ("mm: create the new vm_fault_t type") reproduce: # apt-get install sparse git checkout 3d3539018d2cbd12e5af4a132636ee7fd8d43ef0 make ARCH=x86_64 allmodconfig make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' >> mm/memory.c:3968:21: sparse: incorrect type in assignment (different >> base types) @@ expected restricted vm_fault_t [usertype] ret @@ >> got e] ret @@ mm/memory.c:3968:21: expected restricted vm_fault_t [usertype] ret mm/memory.c:3968:21: got int This patch converts to return vm_fault_t type for hugetlb_fault() when CONFIG_HUGETLB_PAGE=n. Regarding the sparse warning, Luc said: : This is the expected behaviour. The constant 0 is magic regarding bitwise : types but ({ ...; 0; }) is not, it is just an ordinary expression of type : 'int'. : : So, IMHO, Souptick's patch is the right thing to do. Link: http://lkml.kernel.org/r/20190318162604.GA31553@jordon-HP-15-Notebook-PC Signed-off-by: Souptick Joarder Reviewed-by: Mike Kravetz Cc: Matthew Wilcox Cc: Luc Van Oostenryck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index ea35263eb76b..11943b60f208 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -203,7 +203,6 @@ static inline void hugetlb_show_meminfo(void) #define pud_huge(x) 0 #define is_hugepage_only_range(mm, addr, len) 0 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; }) -#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; }) #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \ src_addr, pagep) ({ BUG(); 0; }) #define huge_pte_offset(mm, address, sz) 0 @@ -234,6 +233,13 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb, { BUG(); } +static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, + struct vm_area_struct *vma, unsigned long address, + unsigned int flags) +{ + BUG(); + return 0; +} #endif /* !CONFIG_HUGETLB_PAGE */ /* -- cgit From a7f40cfe3b7ada57af9b62fd28430eeb4a7cfcb7 Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Thu, 28 Mar 2019 20:43:55 -0700 Subject: mm: mempolicy: make mbind() return -EIO when MPOL_MF_STRICT is specified When MPOL_MF_STRICT was specified and an existing page was already on a node that does not follow the policy, mbind() should return -EIO. But commit 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") broke the rule. And commit c8633798497c ("mm: mempolicy: mbind and migrate_pages support thp migration") didn't return the correct value for THP mbind() too. If MPOL_MF_STRICT is set, ignore vma_migratable() to make sure it reaches queue_pages_to_pte_range() or queue_pages_pmd() to check if an existing page was already on a node that does not follow the policy. And, non-migratable vma may be used, return -EIO too if MPOL_MF_MOVE or MPOL_MF_MOVE_ALL was specified. Tested with https://github.com/metan-ucw/ltp/blob/master/testcases/kernel/syscalls/mbind/mbind02.c [akpm@linux-foundation.org: tweak code comment] Link: http://lkml.kernel.org/r/1553020556-38583-1-git-send-email-yang.shi@linux.alibaba.com Fixes: 6f4576e3687b ("mempolicy: apply page table walker on queue_pages_range()") Signed-off-by: Yang Shi Signed-off-by: Oscar Salvador Reported-by: Cyril Hrubis Suggested-by: Kirill A. Shutemov Acked-by: Rafael Aquini Reviewed-by: Oscar Salvador Acked-by: David Rientjes Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index af171ccb56a2..2219e747df49 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -428,6 +428,13 @@ static inline bool queue_pages_required(struct page *page, return node_isset(nid, *qp->nmask) == !(flags & MPOL_MF_INVERT); } +/* + * queue_pages_pmd() has three possible return values: + * 1 - pages are placed on the right node or queued successfully. + * 0 - THP was split. + * -EIO - is migration entry or MPOL_MF_STRICT was specified and an existing + * page was already on a node that does not follow the policy. + */ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long end, struct mm_walk *walk) { @@ -437,7 +444,7 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, unsigned long flags; if (unlikely(is_pmd_migration_entry(*pmd))) { - ret = 1; + ret = -EIO; goto unlock; } page = pmd_page(*pmd); @@ -454,8 +461,15 @@ static int queue_pages_pmd(pmd_t *pmd, spinlock_t *ptl, unsigned long addr, ret = 1; flags = qp->flags; /* go to thp migration */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(walk->vma)) { + ret = -EIO; + goto unlock; + } + migrate_page_add(page, qp->pagelist, flags); + } else + ret = -EIO; unlock: spin_unlock(ptl); out: @@ -480,8 +494,10 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { ret = queue_pages_pmd(pmd, ptl, addr, end, walk); - if (ret) + if (ret > 0) return 0; + else if (ret < 0) + return ret; } if (pmd_trans_unstable(pmd)) @@ -502,11 +518,16 @@ static int queue_pages_pte_range(pmd_t *pmd, unsigned long addr, continue; if (!queue_pages_required(page, qp)) continue; - migrate_page_add(page, qp->pagelist, flags); + if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) { + if (!vma_migratable(vma)) + break; + migrate_page_add(page, qp->pagelist, flags); + } else + break; } pte_unmap_unlock(pte - 1, ptl); cond_resched(); - return 0; + return addr != end ? -EIO : 0; } static int queue_pages_hugetlb(pte_t *pte, unsigned long hmask, @@ -576,7 +597,12 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, unsigned long endvma = vma->vm_end; unsigned long flags = qp->flags; - if (!vma_migratable(vma)) + /* + * Need check MPOL_MF_STRICT to return -EIO if possible + * regardless of vma_migratable + */ + if (!vma_migratable(vma) && + !(flags & MPOL_MF_STRICT)) return 1; if (endvma > end) @@ -603,7 +629,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end, } /* queue pages from current vma */ - if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & MPOL_MF_VALID) return 0; return 1; } -- cgit From 5ae2efb1dea9f537453e841714e3ee2757595aec Mon Sep 17 00:00:00 2001 From: Oscar Salvador Date: Thu, 28 Mar 2019 20:44:01 -0700 Subject: mm/debug.c: fix __dump_page when mapping->host is not set While debugging something, I added a dump_page() into do_swap_page(), and I got the splat from below. The issue happens when dereferencing mapping->host in __dump_page(): ... else if (mapping) { pr_warn("%ps ", mapping->a_ops); if (mapping->host->i_dentry.first) { struct dentry *dentry; dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); pr_warn("name:\"%pd\" ", dentry); } } ... Swap address space does not contain an inode information, and so mapping->host equals NULL. Although the dump_page() call was added artificially into do_swap_page(), I am not sure if we can hit this from any other path, so it looks worth fixing it. We can easily do that by checking mapping->host first. Link: http://lkml.kernel.org/r/20190318072931.29094-1-osalvador@suse.de Fixes: 1c6fb1d89e73c ("mm: print more information about mapping in __dump_page") Signed-off-by: Oscar Salvador Acked-by: Michal Hocko Acked-by: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/debug.c b/mm/debug.c index 45d9eb77b84e..eee9c221280c 100644 --- a/mm/debug.c +++ b/mm/debug.c @@ -79,7 +79,7 @@ void __dump_page(struct page *page, const char *reason) pr_warn("ksm "); else if (mapping) { pr_warn("%ps ", mapping->a_ops); - if (mapping->host->i_dentry.first) { + if (mapping->host && mapping->host->i_dentry.first) { struct dentry *dentry; dentry = container_of(mapping->host->i_dentry.first, struct dentry, d_u.d_alias); pr_warn("name:\"%pd\" ", dentry); -- cgit From b736523f0759d1debeb56f8e0c4c87a2bea0fb23 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 28 Mar 2019 20:44:05 -0700 Subject: include/linux/list.h: fix list_is_first() kernel-doc Fix typo of kernel-doc parameter notation (there should be no space between '@' and the parameter name). Also fixes bogus kernel-doc notation output formatting. Link: http://lkml.kernel.org/r/ddce8b80-9a8a-d52d-3546-87b2211c089a@infradead.org Fixes: 70b44595eafe9 ("mm, compaction: use free lists to quickly locate a migration source") Signed-off-by: Randy Dunlap Acked-by: Mel Gorman Reviewed-by: William Kucharski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/list.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/list.h b/include/linux/list.h index 79626b5ab36c..58aa3adf94e6 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -207,7 +207,7 @@ static inline void list_bulk_move_tail(struct list_head *head, } /** - * list_is_first -- tests whether @ list is the first entry in list @head + * list_is_first -- tests whether @list is the first entry in list @head * @list: the entry to test * @head: the head of the list */ -- cgit From eebf36480678f948b3ed15d56ca7b8e6194e7c18 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Mar 2019 20:44:09 -0700 Subject: fs/proc/kcore.c: make kcore_modules static Fix sparse warning: fs/proc/kcore.c:591:19: warning: symbol 'kcore_modules' was not declared. Should it be static? Link: http://lkml.kernel.org/r/20190320135417.13272-1-yuehaibing@huawei.com Signed-off-by: YueHaibing Acked-by: Mukesh Ojha Cc: Alexey Dobriyan Cc: Omar Sandoval Cc: James Morse Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d29d869abec1..f5834488b67d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -615,7 +615,7 @@ static void __init proc_kcore_text_init(void) /* * MODULES_VADDR has no intersection with VMALLOC_ADDR. */ -struct kcore_list kcore_modules; +static struct kcore_list kcore_modules; static void __init add_modules_range(void) { if (MODULES_VADDR != VMALLOC_START && MODULES_END != VMALLOC_END) { -- cgit From fcfc2aa0185f4a731d05a21e9f359968fdfd02e7 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Thu, 28 Mar 2019 20:44:13 -0700 Subject: ptrace: take into account saved_sigmask in PTRACE{GET,SET}SIGMASK There are a few system calls (pselect, ppoll, etc) which replace a task sigmask while they are running in a kernel-space When a task calls one of these syscalls, the kernel saves a current sigmask in task->saved_sigmask and sets a syscall sigmask. On syscall-exit-stop, ptrace traps a task before restoring the saved_sigmask, so PTRACE_GETSIGMASK returns the syscall sigmask and PTRACE_SETSIGMASK does nothing, because its sigmask is replaced by saved_sigmask, when the task returns to user-space. This patch fixes this problem. PTRACE_GETSIGMASK returns saved_sigmask if it's set. PTRACE_SETSIGMASK drops the TIF_RESTORE_SIGMASK flag. Link: http://lkml.kernel.org/r/20181120060616.6043-1-avagin@gmail.com Fixes: 29000caecbe8 ("ptrace: add ability to get/set signal-blocked mask") Signed-off-by: Andrei Vagin Acked-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/signal.h | 18 ++++++++++++++++++ kernel/ptrace.c | 15 +++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ae5655197698..e412c092c1e8 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -418,10 +418,20 @@ static inline void set_restore_sigmask(void) set_thread_flag(TIF_RESTORE_SIGMASK); WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } + +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + clear_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} + static inline void clear_restore_sigmask(void) { clear_thread_flag(TIF_RESTORE_SIGMASK); } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return test_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK); +} static inline bool test_restore_sigmask(void) { return test_thread_flag(TIF_RESTORE_SIGMASK); @@ -439,6 +449,10 @@ static inline void set_restore_sigmask(void) current->restore_sigmask = true; WARN_ON(!test_thread_flag(TIF_SIGPENDING)); } +static inline void clear_tsk_restore_sigmask(struct task_struct *tsk) +{ + tsk->restore_sigmask = false; +} static inline void clear_restore_sigmask(void) { current->restore_sigmask = false; @@ -447,6 +461,10 @@ static inline bool test_restore_sigmask(void) { return current->restore_sigmask; } +static inline bool test_tsk_restore_sigmask(struct task_struct *tsk) +{ + return tsk->restore_sigmask; +} static inline bool test_and_clear_restore_sigmask(void) { if (!current->restore_sigmask) diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 771e93f9c43f..6f357f4fc859 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -29,6 +29,7 @@ #include #include #include +#include /* * Access another process' address space via ptrace. @@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request, ret = ptrace_setsiginfo(child, &siginfo); break; - case PTRACE_GETSIGMASK: + case PTRACE_GETSIGMASK: { + sigset_t *mask; + if (addr != sizeof(sigset_t)) { ret = -EINVAL; break; } - if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t))) + if (test_tsk_restore_sigmask(child)) + mask = &child->saved_sigmask; + else + mask = &child->blocked; + + if (copy_to_user(datavp, mask, sizeof(sigset_t))) ret = -EFAULT; else ret = 0; break; + } case PTRACE_SETSIGMASK: { sigset_t new_set; @@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request, child->blocked = new_set; spin_unlock_irq(&child->sighand->siglock); + clear_tsk_restore_sigmask(child); + ret = 0; break; } -- cgit From c4efe484b5f0d768e23c9731082fec827723e738 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Mar 2019 20:44:16 -0700 Subject: mm/memory_hotplug.c: fix notification in offline error path When start_isolate_page_range() returned -EBUSY in __offline_pages(), it calls memory_notify(MEM_CANCEL_OFFLINE, &arg) with an uninitialized "arg". As the result, it triggers warnings below. Also, it is only necessary to notify MEM_CANCEL_OFFLINE after MEM_GOING_OFFLINE. page:ffffea0001200000 count:1 mapcount:0 mapping:0000000000000000 index:0x0 flags: 0x3fffe000001000(reserved) raw: 003fffe000001000 ffffea0001200008 ffffea0001200008 0000000000000000 raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000 page dumped because: unmovable page WARNING: CPU: 25 PID: 1665 at mm/kasan/common.c:665 kasan_mem_notifier+0x34/0x23b CPU: 25 PID: 1665 Comm: bash Tainted: G W 5.0.0+ #94 Hardware name: HP ProLiant DL180 Gen9/ProLiant DL180 Gen9, BIOS U20 10/25/2017 RIP: 0010:kasan_mem_notifier+0x34/0x23b RSP: 0018:ffff8883ec737890 EFLAGS: 00010206 RAX: 0000000000000246 RBX: ff10f0f4435f1000 RCX: f887a7a21af88000 RDX: dffffc0000000000 RSI: 0000000000000020 RDI: ffff8881f221af88 RBP: ffff8883ec737898 R08: ffff888000000000 R09: ffffffffb0bddcd0 R10: ffffed103e857088 R11: ffff8881f42b8443 R12: dffffc0000000000 R13: 00000000fffffff9 R14: dffffc0000000000 R15: 0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000560fbd31d730 CR3: 00000004049c6003 CR4: 00000000001606a0 Call Trace: notifier_call_chain+0xbf/0x130 __blocking_notifier_call_chain+0x76/0xc0 blocking_notifier_call_chain+0x16/0x20 memory_notify+0x1b/0x20 __offline_pages+0x3e2/0x1210 offline_pages+0x11/0x20 memory_block_action+0x144/0x300 memory_subsys_offline+0xe5/0x170 device_offline+0x13f/0x1e0 state_store+0xeb/0x110 dev_attr_store+0x3f/0x70 sysfs_kf_write+0x104/0x150 kernfs_fop_write+0x25c/0x410 __vfs_write+0x66/0x120 vfs_write+0x15a/0x4f0 ksys_write+0xd2/0x1b0 __x64_sys_write+0x73/0xb0 do_syscall_64+0xeb/0xb78 entry_SYSCALL_64_after_hwframe+0x44/0xa9 RIP: 0033:0x7f14f75cc3b8 RSP: 002b:00007ffe84d01d68 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000008 RCX: 00007f14f75cc3b8 RDX: 0000000000000008 RSI: 0000563f8e433d70 RDI: 0000000000000001 RBP: 0000563f8e433d70 R08: 000000000000000a R09: 00007ffe84d018f0 R10: 000000000000000a R11: 0000000000000246 R12: 00007f14f789e780 R13: 0000000000000008 R14: 00007f14f7899740 R15: 0000000000000008 Link: http://lkml.kernel.org/r/20190320204255.53571-1-cai@lca.pw Fixes: 7960509329c2 ("mm, memory_hotplug: print reason for the offlining failure") Reviewed-by: Oscar Salvador Acked-by: Michal Hocko Signed-off-by: Qian Cai Reviewed-by: Andrew Morton Cc: [5.0.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory_hotplug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0e0a16021fd5..0082d699be94 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1699,12 +1699,12 @@ static int __ref __offline_pages(unsigned long start_pfn, failed_removal_isolated: undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE); + memory_notify(MEM_CANCEL_OFFLINE, &arg); failed_removal: pr_debug("memory offlining [mem %#010llx-%#010llx] failed due to %s\n", (unsigned long long) start_pfn << PAGE_SHIFT, ((unsigned long long) end_pfn << PAGE_SHIFT) - 1, reason); - memory_notify(MEM_CANCEL_OFFLINE, &arg); /* pushback to free area */ mem_hotplug_done(); return ret; -- cgit From f5777bc2d9cf0712554228b1a7927b6f13f5c1f0 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Thu, 28 Mar 2019 20:44:21 -0700 Subject: mm/page_isolation.c: fix a wrong flag in set_migratetype_isolate() Due to has_unmovable_pages() taking an incorrect irqsave flag instead of the isolation flag in set_migratetype_isolate(), there are issues with HWPOSION and error reporting where dump_page() is not called when there is an unmovable page. Link: http://lkml.kernel.org/r/20190320204941.53731-1-cai@lca.pw Fixes: d381c54760dc ("mm: only report isolation failures when offlining memory") Acked-by: Michal Hocko Reviewed-by: Oscar Salvador Signed-off-by: Qian Cai Cc: [5.0.x] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_isolation.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/page_isolation.c b/mm/page_isolation.c index bf4159d771c7..019280712e1b 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -59,7 +59,8 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_ * FIXME: Now, memory hotplug doesn't call shrink_slab() by itself. * We just check MOVABLE pages. */ - if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, flags)) + if (!has_unmovable_pages(zone, page, arg.pages_found, migratetype, + isol_flags)) ret = 0; /* -- cgit From 0bc9f5d14a93971c6cd9c0d81b0fc154fc54c65d Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Thu, 28 Mar 2019 20:44:24 -0700 Subject: drivers/block/zram/zram_drv.c: fix idle/writeback string compare Makoto report a below KASAN error: zram does out-of-bounds read. Because strscpy copies from source up to count bytes unconditionally. It could cause out-of-bounds read on next object in slab. To prevent it, use strlcpy which checks source's length automatically. BUG: KASAN: slab-out-of-bounds in strscpy+0x68/0x154 Read of size 8 at addr ffffffc0c3495a00 by task system_server/1314 .. Call trace: strscpy+0x68/0x154 idle_store+0xc4/0x34c dev_attr_store+0x50/0x6c sysfs_kf_write+0x98/0xb4 kernfs_fop_write+0x198/0x260 __vfs_write+0x10c/0x338 vfs_write+0x114/0x238 SyS_write+0xc8/0x168 __sys_trace_return+0x0/0x4 Allocated by task 1314: __kmalloc+0x280/0x318 kernfs_fop_write+0xac/0x260 __vfs_write+0x10c/0x338 vfs_write+0x114/0x238 SyS_write+0xc8/0x168 __sys_trace_return+0x0/0x4 Freed by task 2855: kfree+0x138/0x630 kernfs_put_open_node+0x10c/0x124 kernfs_fop_release+0xd8/0x114 __fput+0x130/0x2a4 ____fput+0x1c/0x28 task_work_run+0x16c/0x1c8 do_notify_resume+0x2bc/0x107c work_pending+0x8/0x10 The buggy address belongs to the object at ffffffc0c3495a00 which belongs to the cache kmalloc-128 of size 128 The buggy address is located 0 bytes inside of 128-byte region [ffffffc0c3495a00, ffffffc0c3495a80) The buggy address belongs to the page: page:ffffffbf030d2500 count:1 mapcount:0 mapping: (null) index:0x0 compound_mapcount: 0 flags: 0x4000000000010200(slab|head) page dumped because: kasan: bad access detected Memory state around the buggy address: ffffffc0c3495900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ffffffc0c3495980: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc >ffffffc0c3495a00: 04 fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ^ ffffffc0c3495a80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc ffffffc0c3495b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 Link: http://lkml.kernel.org/r/20190319231911.145968-1-minchan@kernel.org Cc: [5.0] Signed-off-by: Minchan Kim Reported-by: Makoto Wu Reviewed-by: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/block/zram/zram_drv.c | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index e7a5f1d1c314..399cad7daae7 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -290,18 +290,8 @@ static ssize_t idle_store(struct device *dev, struct zram *zram = dev_to_zram(dev); unsigned long nr_pages = zram->disksize >> PAGE_SHIFT; int index; - char mode_buf[8]; - ssize_t sz; - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing new line */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - - if (strcmp(mode_buf, "all")) + if (!sysfs_streq(buf, "all")) return -EINVAL; down_read(&zram->init_lock); @@ -635,25 +625,15 @@ static ssize_t writeback_store(struct device *dev, struct bio bio; struct bio_vec bio_vec; struct page *page; - ssize_t ret, sz; - char mode_buf[8]; - int mode = -1; + ssize_t ret; + int mode; unsigned long blk_idx = 0; - sz = strscpy(mode_buf, buf, sizeof(mode_buf)); - if (sz <= 0) - return -EINVAL; - - /* ignore trailing newline */ - if (mode_buf[sz - 1] == '\n') - mode_buf[sz - 1] = 0x00; - - if (!strcmp(mode_buf, "idle")) + if (sysfs_streq(buf, "idle")) mode = IDLE_WRITEBACK; - else if (!strcmp(mode_buf, "huge")) + else if (sysfs_streq(buf, "huge")) mode = HUGE_WRITEBACK; - - if (mode == -1) + else return -EINVAL; down_read(&zram->init_lock); -- cgit From d2b2c6dd227ba5b8a802858748ec9a780cb75b47 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Thu, 28 Mar 2019 20:44:28 -0700 Subject: mm/migrate.c: add missing flush_dcache_page for non-mapped page migrate Our MIPS 1004Kc SoCs were seeing random userspace crashes with SIGILL and SIGSEGV that could not be traced back to a userspace code bug. They had all the magic signs of an I/D cache coherency issue. Now recently we noticed that the /proc/sys/vm/compact_memory interface was quite efficient at provoking this class of userspace crashes. Studying the code in mm/migrate.c there is a distinction made between migrating a page that is mapped at the instant of migration and one that is not mapped. Our problem turned out to be the non-mapped pages. For the non-mapped page the code performs a copy of the page content and all relevant meta-data of the page without doing the required D-cache maintenance. This leaves dirty data in the D-cache of the CPU and on the 1004K cores this data is not visible to the I-cache. A subsequent page-fault that triggers a mapping of the page will happily serve the process with potentially stale code. What about ARM then, this bug should have seen greater exposure? Well ARM became immune to this flaw back in 2010, see commit c01778001a4f ("ARM: 6379/1: Assume new page cache pages have dirty D-cache"). My proposed fix moves the D-cache maintenance inside move_to_new_page to make it common for both cases. Link: http://lkml.kernel.org/r/20190315083502.11849-1-larper@axis.com Fixes: 97ee0524614 ("flush cache before installing new page at migraton") Signed-off-by: Lars Persson Reviewed-by: Paul Burton Acked-by: Mel Gorman Cc: Ralf Baechle Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/migrate.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mm/migrate.c b/mm/migrate.c index ac6f4939bb59..663a5449367a 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -248,10 +248,8 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, pte = swp_entry_to_pte(entry); } else if (is_device_public_page(new)) { pte = pte_mkdevmap(pte); - flush_dcache_page(new); } - } else - flush_dcache_page(new); + } #ifdef CONFIG_HUGETLB_PAGE if (PageHuge(new)) { @@ -995,6 +993,13 @@ static int move_to_new_page(struct page *newpage, struct page *page, */ if (!PageMappingFlags(page)) page->mapping = NULL; + + if (unlikely(is_zone_device_page(newpage))) { + if (is_device_public_page(newpage)) + flush_dcache_page(newpage); + } else + flush_dcache_page(newpage); + } out: return rc; -- cgit From 4462996ea3cc6bcf3c4efbd7bd2514a15dd8ece4 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 28 Mar 2019 20:44:32 -0700 Subject: checkpatch: add %pt as a valid vsprintf extension Commit 4d42c44727a0 ("lib/vsprintf: Print time and date in human readable format via %pt") introduced a new extension, %pt. Add it in the list of valid extensions. Link: http://lkml.kernel.org/r/20190314203719.29130-1-alexandre.belloni@bootlin.com Signed-off-by: Alexandre Belloni Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- scripts/checkpatch.pl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 5b756278df13..a09333fd7cef 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -5977,7 +5977,7 @@ sub process { while ($fmt =~ /(\%[\*\d\.]*p(\w))/g) { $specifier = $1; $extension = $2; - if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOx]/) { + if ($extension !~ /[SsBKRraEhMmIiUDdgVCbGNOxt]/) { $bad_specifier = $specifier; last; } -- cgit From 2620327852478e695afb2eebe66c354b3bc456cc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 28 Mar 2019 20:44:36 -0700 Subject: fs: fs_parser: fix printk format warning Fix printk format warning (seen on i386 builds) by using ptrdiff format specifier (%t): fs/fs_parser.c:413:6: warning: format `%lu' expects argument of type `long unsigned int', but argument 3 has type `int' [-Wformat=] Link: http://lkml.kernel.org/r/19432668-ffd3-fbb2-af4f-1c8e48f6cc81@infradead.org Signed-off-by: Randy Dunlap Acked-by: Geert Uytterhoeven Cc: David Howells Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/fs_parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/fs_parser.c b/fs/fs_parser.c index 842e8f749db6..570d71043acf 100644 --- a/fs/fs_parser.c +++ b/fs/fs_parser.c @@ -410,7 +410,7 @@ bool fs_validate_description(const struct fs_parameter_description *desc) for (param = desc->specs; param->name; param++) { if (param->opt == e->opt && param->type != fs_param_is_enum) { - pr_err("VALIDATE %s: e[%lu] enum val for %s\n", + pr_err("VALIDATE %s: e[%tu] enum val for %s\n", name, e - desc->enums, param->name); good = false; } -- cgit From 23da9588037ecdd4901db76a5b79a42b529c4ec3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Mar 2019 20:44:40 -0700 Subject: fs/proc/proc_sysctl.c: fix NULL pointer dereference in put_links Syzkaller reports: kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN PTI CPU: 1 PID: 5373 Comm: syz-executor.0 Not tainted 5.0.0-rc8+ #3 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 RIP: 0010:put_links+0x101/0x440 fs/proc/proc_sysctl.c:1599 Code: 00 0f 85 3a 03 00 00 48 8b 43 38 48 89 44 24 20 48 83 c0 38 48 89 c2 48 89 44 24 28 48 b8 00 00 00 00 00 fc ff df 48 c1 ea 03 <80> 3c 02 00 0f 85 fe 02 00 00 48 8b 74 24 20 48 c7 c7 60 2a 9d 91 RSP: 0018:ffff8881d828f238 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: ffff8881e01b1140 RCX: ffffffff8ee98267 RDX: 0000000000000007 RSI: ffffc90001479000 RDI: ffff8881e01b1178 RBP: dffffc0000000000 R08: ffffed103ee27259 R09: ffffed103ee27259 R10: 0000000000000001 R11: ffffed103ee27258 R12: fffffffffffffff4 R13: 0000000000000006 R14: ffff8881f59838c0 R15: dffffc0000000000 FS: 00007f072254f700(0000) GS:ffff8881f7100000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fff8b286668 CR3: 00000001f0542002 CR4: 00000000007606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 PKRU: 55555554 Call Trace: drop_sysctl_table+0x152/0x9f0 fs/proc/proc_sysctl.c:1629 get_subdir fs/proc/proc_sysctl.c:1022 [inline] __register_sysctl_table+0xd65/0x1090 fs/proc/proc_sysctl.c:1335 br_netfilter_init+0xbc/0x1000 [br_netfilter] do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007f072254ec58 EFLAGS: 00000246 ORIG_RAX: 0000000000000139 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000020000280 RDI: 0000000000000003 RBP: 00007f072254ec70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007f072254f6bc R13: 00000000004bcefa R14: 00000000006f6fb0 R15: 0000000000000004 Modules linked in: br_netfilter(+) dvb_usb_dibusb_mc_common dib3000mc dibx000_common dvb_usb_dibusb_common dvb_usb_dw2102 dvb_usb classmate_laptop palmas_regulator cn videobuf2_v4l2 v4l2_common snd_soc_bd28623 mptbase snd_usb_usx2y snd_usbmidi_lib snd_rawmidi wmi libnvdimm lockd sunrpc grace rc_kworld_pc150u rc_core rtc_da9063 sha1_ssse3 i2c_cros_ec_tunnel adxl34x_spi adxl34x nfnetlink lib80211 i5500_temp dvb_as102 dvb_core videobuf2_common videodev media videobuf2_vmalloc videobuf2_memops udc_core lnbp22 leds_lp3952 hid_roccat_ryos s1d13xxxfb mtd vport_geneve openvswitch nf_conncount nf_nat_ipv6 nsh geneve udp_tunnel ip6_udp_tunnel snd_soc_mt6351 sis_agp phylink snd_soc_adau1761_spi snd_soc_adau1761 snd_soc_adau17x1 snd_soc_core snd_pcm_dmaengine ac97_bus snd_compress snd_soc_adau_utils snd_soc_sigmadsp_regmap snd_soc_sigmadsp raid_class hid_roccat_konepure hid_roccat_common hid_roccat c2port_duramar2150 core mdio_bcm_unimac iptable_security iptable_raw iptable_mangle iptable_nat nf_nat_ipv4 nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 iptable_filter bpfilter ip6_vti ip_vti ip_gre ipip sit tunnel4 ip_tunnel hsr veth netdevsim devlink vxcan batman_adv cfg80211 rfkill chnl_net caif nlmon dummy team bonding vcan bridge stp llc ip6_gre gre ip6_tunnel tunnel6 tun crct10dif_pclmul crc32_pclmul crc32c_intel ghash_clmulni_intel joydev mousedev ide_pci_generic piix aesni_intel aes_x86_64 ide_core crypto_simd atkbd cryptd glue_helper serio_raw ata_generic pata_acpi i2c_piix4 floppy sch_fq_codel ip_tables x_tables ipv6 [last unloaded: lm73] Dumping ftrace buffer: (ftrace buffer empty) ---[ end trace 770020de38961fd0 ]--- A new dir entry can be created in get_subdir and its 'header->parent' is set to NULL. Only after insert_header success, it will be set to 'dir', otherwise 'header->parent' is set to NULL and drop_sysctl_table is called. However in err handling path of get_subdir, drop_sysctl_table also be called on 'new->header' regardless its value of parent pointer. Then put_links is called, which triggers NULL-ptr deref when access member of header->parent. In fact we have multiple error paths which call drop_sysctl_table() there, upon failure on insert_links() we also call drop_sysctl_table().And even in the successful case on __register_sysctl_table() we still always call drop_sysctl_table().This patch fix it. Link: http://lkml.kernel.org/r/20190314085527.13244-1-yuehaibing@huawei.com Fixes: 0e47c99d7fe25 ("sysctl: Replace root_list with links between sysctl_table_sets") Signed-off-by: YueHaibing Reported-by: Hulk Robot Acked-by: Luis Chamberlain Cc: Kees Cook Cc: Alexey Dobriyan Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Al Viro Cc: Eric W. Biederman Cc: [3.4+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/proc_sysctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 4d598a399bbf..d65390727541 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -1626,7 +1626,8 @@ static void drop_sysctl_table(struct ctl_table_header *header) if (--header->nreg) return; - put_links(header); + if (parent) + put_links(header); start_unregistering(header); if (!--header->count) kfree_rcu(header, rcu); -- cgit From 2623c4fbe2ad1341ff2d1e12410d0afdae2490ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 29 Mar 2019 12:36:04 -0700 Subject: LSM: Revive CONFIG_DEFAULT_SECURITY_* for "make oldconfig" Commit 70b62c25665f636c ("LoadPin: Initialize as ordered LSM") removed CONFIG_DEFAULT_SECURITY_{SELINUX,SMACK,TOMOYO,APPARMOR,DAC} from security/Kconfig and changed CONFIG_LSM to provide a fixed ordering as a default value. That commit expected that existing users (upgrading from Linux 5.0 and earlier) will edit CONFIG_LSM value in accordance with their CONFIG_DEFAULT_SECURITY_* choice in their old kernel configs. But since users might forget to edit CONFIG_LSM value, this patch revives the choice (only for providing the default value for CONFIG_LSM) in order to make sure that CONFIG_LSM reflects CONFIG_DEFAULT_SECURITY_* from their old kernel configs. Note that since TOMOYO can be fully stacked against the other legacy major LSMs, when it is selected, it explicitly disables the other LSMs to avoid them also initializing since TOMOYO does not expect this currently. Reported-by: Jakub Kicinski Reported-by: Randy Dunlap Fixes: 70b62c25665f636c ("LoadPin: Initialize as ordered LSM") Co-developed-by: Tetsuo Handa Signed-off-by: Tetsuo Handa Signed-off-by: Kees Cook Acked-by: Casey Schaufler Signed-off-by: James Morris --- security/Kconfig | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/security/Kconfig b/security/Kconfig index 1d6463fb1450..353cfef71d4e 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -239,8 +239,46 @@ source "security/safesetid/Kconfig" source "security/integrity/Kconfig" +choice + prompt "First legacy 'major LSM' to be initialized" + default DEFAULT_SECURITY_SELINUX if SECURITY_SELINUX + default DEFAULT_SECURITY_SMACK if SECURITY_SMACK + default DEFAULT_SECURITY_TOMOYO if SECURITY_TOMOYO + default DEFAULT_SECURITY_APPARMOR if SECURITY_APPARMOR + default DEFAULT_SECURITY_DAC + + help + This choice is there only for converting CONFIG_DEFAULT_SECURITY + in old kernel configs to CONFIG_LSM in new kernel configs. Don't + change this choice unless you are creating a fresh kernel config, + for this choice will be ignored after CONFIG_LSM has been set. + + Selects the legacy "major security module" that will be + initialized first. Overridden by non-default CONFIG_LSM. + + config DEFAULT_SECURITY_SELINUX + bool "SELinux" if SECURITY_SELINUX=y + + config DEFAULT_SECURITY_SMACK + bool "Simplified Mandatory Access Control" if SECURITY_SMACK=y + + config DEFAULT_SECURITY_TOMOYO + bool "TOMOYO" if SECURITY_TOMOYO=y + + config DEFAULT_SECURITY_APPARMOR + bool "AppArmor" if SECURITY_APPARMOR=y + + config DEFAULT_SECURITY_DAC + bool "Unix Discretionary Access Controls" + +endchoice + config LSM string "Ordered list of enabled LSMs" + default "yama,loadpin,safesetid,integrity,smack,selinux,tomoyo,apparmor" if DEFAULT_SECURITY_SMACK + default "yama,loadpin,safesetid,integrity,apparmor,selinux,smack,tomoyo" if DEFAULT_SECURITY_APPARMOR + default "yama,loadpin,safesetid,integrity,tomoyo" if DEFAULT_SECURITY_TOMOYO + default "yama,loadpin,safesetid,integrity" if DEFAULT_SECURITY_DAC default "yama,loadpin,safesetid,integrity,selinux,smack,tomoyo,apparmor" help A comma-separated list of LSMs, in initialization order. -- cgit From 0aab8e4df4702b31314a27ec4b0631dfad0fae0a Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Sat, 9 Mar 2019 00:04:11 -0600 Subject: leds: pca9532: fix a potential NULL pointer dereference In case of_match_device cannot find a match, return -EINVAL to avoid NULL pointer dereference. Fixes: fa4191a609f2 ("leds: pca9532: Add device tree support") Signed-off-by: Kangjie Lu Signed-off-by: Jacek Anaszewski --- drivers/leds/leds-pca9532.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/leds/leds-pca9532.c b/drivers/leds/leds-pca9532.c index 7fea18b0c15d..7cb4d685a1f1 100644 --- a/drivers/leds/leds-pca9532.c +++ b/drivers/leds/leds-pca9532.c @@ -513,6 +513,7 @@ static int pca9532_probe(struct i2c_client *client, const struct i2c_device_id *id) { int devid; + const struct of_device_id *of_id; struct pca9532_data *data = i2c_get_clientdata(client); struct pca9532_platform_data *pca9532_pdata = dev_get_platdata(&client->dev); @@ -528,8 +529,11 @@ static int pca9532_probe(struct i2c_client *client, dev_err(&client->dev, "no platform data\n"); return -EINVAL; } - devid = (int)(uintptr_t)of_match_device( - of_pca9532_leds_match, &client->dev)->data; + of_id = of_match_device(of_pca9532_leds_match, + &client->dev); + if (unlikely(!of_id)) + return -EINVAL; + devid = (int)(uintptr_t) of_id->data; } else { devid = id->driver_data; } -- cgit From 909346433064b8d840dc82af26161926b8d37558 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 14 Mar 2019 15:06:14 +0100 Subject: leds: trigger: netdev: use memcpy in device_name_store If userspace doesn't end the input with a newline (which can easily happen if the write happens from a C program that does write(fd, iface, strlen(iface))), we may end up including garbage from a previous, longer value in the device_name. For example # cat device_name # printf 'eth12' > device_name # cat device_name eth12 # printf 'eth3' > device_name # cat device_name eth32 I highly doubt anybody is relying on this behaviour, so switch to simply copying the bytes (we've already checked that size is < IFNAMSIZ) and unconditionally zero-terminate it; of course, we also still have to strip a trailing newline. This is also preparation for future patches. Fixes: 06f502f57d0d ("leds: trigger: Introduce a NETDEV trigger") Signed-off-by: Rasmus Villemoes Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- drivers/leds/trigger/ledtrig-netdev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/leds/trigger/ledtrig-netdev.c b/drivers/leds/trigger/ledtrig-netdev.c index 167a94c02d05..136f86a1627d 100644 --- a/drivers/leds/trigger/ledtrig-netdev.c +++ b/drivers/leds/trigger/ledtrig-netdev.c @@ -122,7 +122,8 @@ static ssize_t device_name_store(struct device *dev, trigger_data->net_dev = NULL; } - strncpy(trigger_data->device_name, buf, size); + memcpy(trigger_data->device_name, buf, size); + trigger_data->device_name[size] = 0; if (size > 0 && trigger_data->device_name[size - 1] == '\n') trigger_data->device_name[size - 1] = 0; -- cgit From 79a3aaa7b82e3106be97842dedfd8429248896e6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 31 Mar 2019 14:39:29 -0700 Subject: Linux 5.1-rc3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 52f067eadc48..026fbc450906 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 1 SUBLEVEL = 0 -EXTRAVERSION = -rc2 +EXTRAVERSION = -rc3 NAME = Shy Crocodile # *DOCUMENTATION* -- cgit From 817b4d64da036f5559297a2fdb82b8b14f4ffdcd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 18 Mar 2019 23:00:54 +0300 Subject: ACPI / utils: Introduce acpi_dev_get_first_match_dev() helper The acpi_dev_get_first_match_name() is missing put_device() call and thus keeping reference counting unbalanced. In order to fix the issue introduce a new helper to convert existing users one-by-one to a better API. Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Reviewed-by: Mika Westerberg Acked-by: Mark Brown Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 24 ++++++++++++++++++++++-- include/acpi/acpi_bus.h | 3 +++ include/linux/acpi.h | 6 ++++++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index c4b06cc075f9..5a2bae2b6c3a 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -739,6 +739,7 @@ EXPORT_SYMBOL(acpi_dev_found); struct acpi_dev_match_info { const char *dev_name; + struct acpi_device *adev; struct acpi_device_id hid[2]; const char *uid; s64 hrv; @@ -759,6 +760,7 @@ static int acpi_dev_match_cb(struct device *dev, void *data) return 0; match->dev_name = acpi_dev_name(adev); + match->adev = adev; if (match->hrv == -1) return 1; @@ -806,16 +808,34 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) EXPORT_SYMBOL(acpi_dev_present); /** - * acpi_dev_get_first_match_name - Return name of first match of ACPI device + * acpi_dev_get_first_match_dev - Return the first match of ACPI device * @hid: Hardware ID of the device. * @uid: Unique ID of the device, pass NULL to not check _UID * @hrv: Hardware Revision of the device, pass -1 to not check _HRV * - * Return device name if a matching device was present + * Return the first match of ACPI device if a matching device was present * at the moment of invocation, or NULL otherwise. * + * The caller is responsible to call put_device() on the returned device. + * * See additional information in acpi_dev_present() as well. */ +struct acpi_device * +acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) +{ + struct acpi_dev_match_info match = {}; + struct device *dev; + + strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); + match.uid = uid; + match.hrv = hrv; + + dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb); + return dev ? match.adev : NULL; +} +EXPORT_SYMBOL(acpi_dev_get_first_match_dev); + +/* DEPRECATED, use acpi_dev_get_first_match_dev() instead */ const char * acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv) { diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 0300374101cd..2063e9e2f384 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -91,6 +91,9 @@ acpi_evaluate_dsm_typed(acpi_handle handle, const guid_t *guid, u64 rev, bool acpi_dev_found(const char *hid); bool acpi_dev_present(const char *hid, const char *uid, s64 hrv); +struct acpi_device * +acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); + const char * acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d5dcebd7aad3..3e1d16b00513 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -669,6 +669,12 @@ static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) return false; } +static inline struct acpi_device * +acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) +{ + return NULL; +} + static inline const char * acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv) { -- cgit From 0cf064db948aca1e760fc513594536f761dee1cd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:21 +0200 Subject: extcon: axp288: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Acked-by: Chanwoo Choi Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/extcon/extcon-axp288.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/extcon/extcon-axp288.c b/drivers/extcon/extcon-axp288.c index a983708b77a6..50f9402fb325 100644 --- a/drivers/extcon/extcon-axp288.c +++ b/drivers/extcon/extcon-axp288.c @@ -333,7 +333,7 @@ static int axp288_extcon_probe(struct platform_device *pdev) struct axp288_extcon_info *info; struct axp20x_dev *axp20x = dev_get_drvdata(pdev->dev.parent); struct device *dev = &pdev->dev; - const char *name; + struct acpi_device *adev; int ret, i, pirq; info = devm_kzalloc(&pdev->dev, sizeof(*info), GFP_KERNEL); @@ -357,9 +357,10 @@ static int axp288_extcon_probe(struct platform_device *pdev) if (ret) return ret; - name = acpi_dev_get_first_match_name("INT3496", NULL, -1); - if (name) { - info->id_extcon = extcon_get_extcon_dev(name); + adev = acpi_dev_get_first_match_dev("INT3496", NULL, -1); + if (adev) { + info->id_extcon = extcon_get_extcon_dev(acpi_dev_name(adev)); + put_device(&adev->dev); if (!info->id_extcon) return -EPROBE_DEFER; -- cgit From d00d2109c3679bf87d412b1667bcb6d42c1ac12f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:22 +0200 Subject: gpio: merrifield: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Reviewed-by: Hans de Goede Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/gpio/gpio-merrifield.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 7c659fdaa6d5..2383dc78b123 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -377,10 +377,20 @@ static void mrfld_irq_init_hw(struct mrfld_gpio *priv) } } -static const char *mrfld_gpio_get_pinctrl_dev_name(void) +static const char *mrfld_gpio_get_pinctrl_dev_name(struct mrfld_gpio *priv) { - const char *dev_name = acpi_dev_get_first_match_name("INTC1002", NULL, -1); - return dev_name ? dev_name : "pinctrl-merrifield"; + struct acpi_device *adev; + const char *name; + + adev = acpi_dev_get_first_match_dev("INTC1002", NULL, -1); + if (adev) { + name = devm_kstrdup(priv->dev, acpi_dev_name(adev), GFP_KERNEL); + put_device(&adev->dev); + } else { + name = "pinctrl-merrifield"; + } + + return name; } static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -441,7 +451,7 @@ static int mrfld_gpio_probe(struct pci_dev *pdev, const struct pci_device_id *id return retval; } - pinctrl_dev_name = mrfld_gpio_get_pinctrl_dev_name(); + pinctrl_dev_name = mrfld_gpio_get_pinctrl_dev_name(priv); for (i = 0; i < ARRAY_SIZE(mrfld_gpio_ranges); i++) { range = &mrfld_gpio_ranges[i]; retval = gpiochip_add_pin_range(&priv->chip, -- cgit From 1b55f1c6fd64efd7b1339664edc1222ad99f9c9b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:23 +0200 Subject: ASoC: Intel: bytcht_da7213: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- sound/soc/intel/boards/bytcht_da7213.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_da7213.c b/sound/soc/intel/boards/bytcht_da7213.c index b8e884803777..4decba338156 100644 --- a/sound/soc/intel/boards/bytcht_da7213.c +++ b/sound/soc/intel/boards/bytcht_da7213.c @@ -226,7 +226,7 @@ static int bytcht_da7213_probe(struct platform_device *pdev) struct snd_soc_card *card; struct snd_soc_acpi_mach *mach; const char *platform_name; - const char *i2c_name = NULL; + struct acpi_device *adev; int dai_index = 0; int ret_val = 0; int i; @@ -244,10 +244,11 @@ static int bytcht_da7213_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); - if (i2c_name) { + adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1); + if (adev) { snprintf(codec_name, sizeof(codec_name), - "%s%s", "i2c-", i2c_name); + "i2c-%s", acpi_dev_name(adev)); + put_device(&adev->dev); dailink[dai_index].codec_name = codec_name; } -- cgit From 645056da677082811b978f6285bf1ec0be947340 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:24 +0200 Subject: ASoC: Intel: bytcht_es8316: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- sound/soc/intel/boards/bytcht_es8316.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/boards/bytcht_es8316.c b/sound/soc/intel/boards/bytcht_es8316.c index d2a7e6ba11ae..6937c00cf63d 100644 --- a/sound/soc/intel/boards/bytcht_es8316.c +++ b/sound/soc/intel/boards/bytcht_es8316.c @@ -442,7 +442,7 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct snd_soc_acpi_mach *mach; const char *platform_name; - const char *i2c_name = NULL; + struct acpi_device *adev; struct device *codec_dev; int dai_index = 0; int i; @@ -463,10 +463,11 @@ static int snd_byt_cht_es8316_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); - if (i2c_name) { + adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1); + if (adev) { snprintf(codec_name, sizeof(codec_name), - "%s%s", "i2c-", i2c_name); + "i2c-%s", acpi_dev_name(adev)); + put_device(&adev->dev); byt_cht_es8316_dais[dai_index].codec_name = codec_name; } -- cgit From a320d89e67d6a08af18603b538021087a41bb182 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:25 +0200 Subject: ASoC: Intel: bytcr_rt5640: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- sound/soc/intel/boards/bytcr_rt5640.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c index 940eb27158da..f9175cf6747e 100644 --- a/sound/soc/intel/boards/bytcr_rt5640.c +++ b/sound/soc/intel/boards/bytcr_rt5640.c @@ -1154,7 +1154,7 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) struct byt_rt5640_private *priv; struct snd_soc_acpi_mach *mach; const char *platform_name; - const char *i2c_name = NULL; + struct acpi_device *adev; int ret_val = 0; int dai_index = 0; int i; @@ -1178,11 +1178,11 @@ static int snd_byt_rt5640_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); - if (i2c_name) { + adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1); + if (adev) { snprintf(byt_rt5640_codec_name, sizeof(byt_rt5640_codec_name), - "%s%s", "i2c-", i2c_name); - + "i2c-%s", acpi_dev_name(adev)); + put_device(&adev->dev); byt_rt5640_dais[dai_index].codec_name = byt_rt5640_codec_name; } -- cgit From 7075e9babb5db302907cf32b1db688eb83c85f77 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:26 +0200 Subject: ASoC: Intel: bytcr_rt5651: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- sound/soc/intel/boards/bytcr_rt5651.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sound/soc/intel/boards/bytcr_rt5651.c b/sound/soc/intel/boards/bytcr_rt5651.c index b0a4d297176e..b744add01d12 100644 --- a/sound/soc/intel/boards/bytcr_rt5651.c +++ b/sound/soc/intel/boards/bytcr_rt5651.c @@ -867,8 +867,8 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) struct byt_rt5651_private *priv; struct snd_soc_acpi_mach *mach; const char *platform_name; + struct acpi_device *adev; struct device *codec_dev; - const char *i2c_name = NULL; const char *hp_swapped; bool is_bytcr = false; int ret_val = 0; @@ -894,14 +894,16 @@ static int snd_byt_rt5651_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); - if (!i2c_name) { + adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1); + if (adev) { + snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name), + "i2c-%s", acpi_dev_name(adev)); + put_device(&adev->dev); + byt_rt5651_dais[dai_index].codec_name = byt_rt5651_codec_name; + } else { dev_err(&pdev->dev, "Error cannot find '%s' dev\n", mach->id); return -ENODEV; } - snprintf(byt_rt5651_codec_name, sizeof(byt_rt5651_codec_name), - "%s%s", "i2c-", i2c_name); - byt_rt5651_dais[dai_index].codec_name = byt_rt5651_codec_name; codec_dev = bus_find_device_by_name(&i2c_bus_type, NULL, byt_rt5651_codec_name); -- cgit From fe4c283a79db91ac0d4402a363024789275c3fd1 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:27 +0200 Subject: ASoC: Intel: cht_bsw_rt5645: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- sound/soc/intel/boards/cht_bsw_rt5645.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_rt5645.c b/sound/soc/intel/boards/cht_bsw_rt5645.c index cbc2d458483f..32dbeaf1ab94 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5645.c +++ b/sound/soc/intel/boards/cht_bsw_rt5645.c @@ -532,7 +532,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) struct snd_soc_acpi_mach *mach; const char *platform_name; struct cht_mc_private *drv; - const char *i2c_name = NULL; + struct acpi_device *adev; bool found = false; bool is_bytcr = false; int dai_index = 0; @@ -573,10 +573,11 @@ static int snd_cht_mc_probe(struct platform_device *pdev) } /* fixup codec name based on HID */ - i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); - if (i2c_name) { + adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1); + if (adev) { snprintf(cht_rt5645_codec_name, sizeof(cht_rt5645_codec_name), - "%s%s", "i2c-", i2c_name); + "i2c-%s", acpi_dev_name(adev)); + put_device(&adev->dev); cht_dailink[dai_index].codec_name = cht_rt5645_codec_name; } -- cgit From b664e6fe2225972e0eb3df0bb8dbedd1a0fc641f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:28 +0200 Subject: ASoC: Intel: cht_bsw_rt5672: Convert to use acpi_dev_get_first_match_dev() acpi_dev_get_first_match_name() is deprecated and going to be removed because it leaks a reference. Convert the driver to use acpi_dev_get_first_match_dev() instead. Signed-off-by: Andy Shevchenko Acked-by: Pierre-Louis Bossart Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- sound/soc/intel/boards/cht_bsw_rt5672.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sound/soc/intel/boards/cht_bsw_rt5672.c b/sound/soc/intel/boards/cht_bsw_rt5672.c index 3d5a2b3a06f0..0f7770822388 100644 --- a/sound/soc/intel/boards/cht_bsw_rt5672.c +++ b/sound/soc/intel/boards/cht_bsw_rt5672.c @@ -401,7 +401,7 @@ static int snd_cht_mc_probe(struct platform_device *pdev) struct cht_mc_private *drv; struct snd_soc_acpi_mach *mach = pdev->dev.platform_data; const char *platform_name; - const char *i2c_name; + struct acpi_device *adev; int i; drv = devm_kzalloc(&pdev->dev, sizeof(*drv), GFP_KERNEL); @@ -411,10 +411,11 @@ static int snd_cht_mc_probe(struct platform_device *pdev) strcpy(drv->codec_name, RT5672_I2C_DEFAULT); /* fixup codec name based on HID */ - i2c_name = acpi_dev_get_first_match_name(mach->id, NULL, -1); - if (i2c_name) { + adev = acpi_dev_get_first_match_dev(mach->id, NULL, -1); + if (adev) { snprintf(drv->codec_name, sizeof(drv->codec_name), - "i2c-%s", i2c_name); + "i2c-%s", acpi_dev_name(adev)); + put_device(&adev->dev); for (i = 0; i < ARRAY_SIZE(cht_dailink); i++) { if (!strcmp(cht_dailink[i].codec_name, RT5672_I2C_DEFAULT)) { -- cgit From 257f9053c0204ea47491aa236004fd1226f75fa8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 28 Mar 2019 19:17:29 +0200 Subject: ACPI / utils: Remove deprecated function since no user left There is no more user of acpi_dev_get_first_match_name(), which is deprecated and has no user left, so, remove it for good. Signed-off-by: Andy Shevchenko Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- drivers/acpi/utils.c | 16 ---------------- include/acpi/acpi_bus.h | 3 --- include/linux/acpi.h | 6 ------ 3 files changed, 25 deletions(-) diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c index 5a2bae2b6c3a..89363b245489 100644 --- a/drivers/acpi/utils.c +++ b/drivers/acpi/utils.c @@ -835,22 +835,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) } EXPORT_SYMBOL(acpi_dev_get_first_match_dev); -/* DEPRECATED, use acpi_dev_get_first_match_dev() instead */ -const char * -acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv) -{ - struct acpi_dev_match_info match = {}; - struct device *dev; - - strlcpy(match.hid[0].id, hid, sizeof(match.hid[0].id)); - match.uid = uid; - match.hrv = hrv; - - dev = bus_find_device(&acpi_bus_type, NULL, &match, acpi_dev_match_cb); - return dev ? match.dev_name : NULL; -} -EXPORT_SYMBOL(acpi_dev_get_first_match_name); - /* * acpi_backlight= handling, this is done here rather then in video_detect.c * because __setup cannot be used in modules. diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 2063e9e2f384..f7981751ac77 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -94,9 +94,6 @@ bool acpi_dev_present(const char *hid, const char *uid, s64 hrv); struct acpi_device * acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv); -const char * -acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv); - #ifdef CONFIG_ACPI #include diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3e1d16b00513..392413075cc0 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -675,12 +675,6 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv) return NULL; } -static inline const char * -acpi_dev_get_first_match_name(const char *hid, const char *uid, s64 hrv) -{ - return NULL; -} - static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; -- cgit