From 28a7eedd1109c9277390f44aa11de76b673996cd Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Mon, 1 Jun 2015 15:00:44 -0700 Subject: memory: omap-gpmc: Fix parsing of devices We currently artificially limit the parsing of GPMC connected devices based on the device name. Let's stop doing that, it's confusing as adding devices to .dts files with using normal names like fpga and usb will currently cause them to not probe. Cc: Roger Quadros Reported-by: Brian Hutchinson Signed-off-by: Tony Lindgren --- drivers/memory/omap-gpmc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/memory/omap-gpmc.c b/drivers/memory/omap-gpmc.c index c94ea0d68746..0e524a1de56d 100644 --- a/drivers/memory/omap-gpmc.c +++ b/drivers/memory/omap-gpmc.c @@ -2074,14 +2074,8 @@ static int gpmc_probe_dt(struct platform_device *pdev) ret = gpmc_probe_nand_child(pdev, child); else if (of_node_cmp(child->name, "onenand") == 0) ret = gpmc_probe_onenand_child(pdev, child); - else if (of_node_cmp(child->name, "ethernet") == 0 || - of_node_cmp(child->name, "nor") == 0 || - of_node_cmp(child->name, "uart") == 0) + else ret = gpmc_probe_generic_child(pdev, child); - - if (WARN(ret < 0, "%s: probing gpmc child %s failed\n", - __func__, child->full_name)) - of_node_put(child); } return 0; -- cgit From e3abe2556b2a689b28926cd1581f0b97e9d2afa4 Mon Sep 17 00:00:00 2001 From: Nicholas Krause Date: Thu, 28 May 2015 11:00:05 -0400 Subject: ARM: OMAP2+: Remove unnessary return statement from the void function, omap2_show_dma_caps This removes the no longer required return statement at the end of the void function, omap2_show_dma_cap due to no need for a return statement due to this function always running successfully. Signed-off-by: Nicholas Krause Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/dma.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/mach-omap2/dma.c b/arch/arm/mach-omap2/dma.c index e1a56d87599e..1ed4be184a29 100644 --- a/arch/arm/mach-omap2/dma.c +++ b/arch/arm/mach-omap2/dma.c @@ -117,7 +117,6 @@ static void omap2_show_dma_caps(void) u8 revision = dma_read(REVISION, 0) & 0xff; printk(KERN_INFO "OMAP DMA hardware revision %d.%d\n", revision >> 4, revision & 0xf); - return; } static unsigned configure_dma_errata(void) -- cgit From 27b0d37e4209f3fc40c65e2b7936ee4154f4070d Mon Sep 17 00:00:00 2001 From: Wei Chen Date: Wed, 20 May 2015 08:08:27 +0000 Subject: ARM: dts: atlas7: add pinctrl and gpio descriptions This patch adds pinctrl and gpio stuff according to the atlas7 pinctrl driver. Signed-off-by: Wei Chen Signed-off-by: Barry Song Acked-by: Linus Walleij --- arch/arm/boot/dts/atlas7.dtsi | 1042 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 1041 insertions(+), 1 deletion(-) diff --git a/arch/arm/boot/dts/atlas7.dtsi b/arch/arm/boot/dts/atlas7.dtsi index a753178abc85..1928f78f5254 100644 --- a/arch/arm/boot/dts/atlas7.dtsi +++ b/arch/arm/boot/dts/atlas7.dtsi @@ -120,6 +120,1025 @@ compatible = "sirf,atlas7-ioc"; reg = <0x18880000 0x1000>, <0x10E40000 0x1000>; + + audio_ac97_pmx: audio_ac97@0 { + audio_ac97 { + groups = "audio_ac97_grp"; + function = "audio_ac97"; + }; + }; + + audio_func_dbg_pmx: audio_func_dbg@0 { + audio_func_dbg { + groups = "audio_func_dbg_grp"; + function = "audio_func_dbg"; + }; + }; + + audio_i2s_pmx: audio_i2s@0 { + audio_i2s { + groups = "audio_i2s_grp"; + function = "audio_i2s"; + }; + }; + + audio_i2s_2ch_pmx: audio_i2s_2ch@0 { + audio_i2s_2ch { + groups = "audio_i2s_2ch_grp"; + function = "audio_i2s_2ch"; + }; + }; + + audio_i2s_extclk_pmx: audio_i2s_extclk@0 { + audio_i2s_extclk { + groups = "audio_i2s_extclk_grp"; + function = "audio_i2s_extclk"; + }; + }; + + audio_uart0_pmx: audio_uart0@0 { + audio_uart0 { + groups = "audio_uart0_grp"; + function = "audio_uart0"; + }; + }; + + audio_uart1_pmx: audio_uart1@0 { + audio_uart1 { + groups = "audio_uart1_grp"; + function = "audio_uart1"; + }; + }; + + audio_uart2_pmx0: audio_uart2@0 { + audio_uart2_0 { + groups = "audio_uart2_grp0"; + function = "audio_uart2_m0"; + }; + }; + + audio_uart2_pmx1: audio_uart2@1 { + audio_uart2_1 { + groups = "audio_uart2_grp1"; + function = "audio_uart2_m1"; + }; + }; + + c_can_trnsvr_pmx: c_can_trnsvr@0 { + c_can_trnsvr { + groups = "c_can_trnsvr_grp"; + function = "c_can_trnsvr"; + }; + }; + + c0_can_pmx0: c0_can@0 { + c0_can_0 { + groups = "c0_can_grp0"; + function = "c0_can_m0"; + }; + }; + + c0_can_pmx1: c0_can@1 { + c0_can_1 { + groups = "c0_can_grp1"; + function = "c0_can_m1"; + }; + }; + + c1_can_pmx0: c1_can@0 { + c1_can_0 { + groups = "c1_can_grp0"; + function = "c1_can_m0"; + }; + }; + + c1_can_pmx1: c1_can@1 { + c1_can_1 { + groups = "c1_can_grp1"; + function = "c1_can_m1"; + }; + }; + + c1_can_pmx2: c1_can@2 { + c1_can_2 { + groups = "c1_can_grp2"; + function = "c1_can_m2"; + }; + }; + + ca_audio_lpc_pmx: ca_audio_lpc@0 { + ca_audio_lpc { + groups = "ca_audio_lpc_grp"; + function = "ca_audio_lpc"; + }; + }; + + ca_bt_lpc_pmx: ca_bt_lpc@0 { + ca_bt_lpc { + groups = "ca_bt_lpc_grp"; + function = "ca_bt_lpc"; + }; + }; + + ca_coex_pmx: ca_coex@0 { + ca_coex { + groups = "ca_coex_grp"; + function = "ca_coex"; + }; + }; + + ca_curator_lpc_pmx: ca_curator_lpc@0 { + ca_curator_lpc { + groups = "ca_curator_lpc_grp"; + function = "ca_curator_lpc"; + }; + }; + + ca_pcm_debug_pmx: ca_pcm_debug@0 { + ca_pcm_debug { + groups = "ca_pcm_debug_grp"; + function = "ca_pcm_debug"; + }; + }; + + ca_pio_pmx: ca_pio@0 { + ca_pio { + groups = "ca_pio_grp"; + function = "ca_pio"; + }; + }; + + ca_sdio_debug_pmx: ca_sdio_debug@0 { + ca_sdio_debug { + groups = "ca_sdio_debug_grp"; + function = "ca_sdio_debug"; + }; + }; + + ca_spi_pmx: ca_spi@0 { + ca_spi { + groups = "ca_spi_grp"; + function = "ca_spi"; + }; + }; + + ca_trb_pmx: ca_trb@0 { + ca_trb { + groups = "ca_trb_grp"; + function = "ca_trb"; + }; + }; + + ca_uart_debug_pmx: ca_uart_debug@0 { + ca_uart_debug { + groups = "ca_uart_debug_grp"; + function = "ca_uart_debug"; + }; + }; + + clkc_pmx0: clkc@0 { + clkc_0 { + groups = "clkc_grp0"; + function = "clkc_m0"; + }; + }; + + clkc_pmx1: clkc@1 { + clkc_1 { + groups = "clkc_grp1"; + function = "clkc_m1"; + }; + }; + + gn_gnss_i2c_pmx: gn_gnss_i2c@0 { + gn_gnss_i2c { + groups = "gn_gnss_i2c_grp"; + function = "gn_gnss_i2c"; + }; + }; + + gn_gnss_uart_nopause_pmx: gn_gnss_uart_nopause@0 { + gn_gnss_uart_nopause { + groups = "gn_gnss_uart_nopause_grp"; + function = "gn_gnss_uart_nopause"; + }; + }; + + gn_gnss_uart_pmx: gn_gnss_uart@0 { + gn_gnss_uart { + groups = "gn_gnss_uart_grp"; + function = "gn_gnss_uart"; + }; + }; + + gn_trg_spi_pmx0: gn_trg_spi@0 { + gn_trg_spi_0 { + groups = "gn_trg_spi_grp0"; + function = "gn_trg_spi_m0"; + }; + }; + + gn_trg_spi_pmx1: gn_trg_spi@1 { + gn_trg_spi_1 { + groups = "gn_trg_spi_grp1"; + function = "gn_trg_spi_m1"; + }; + }; + + cvbs_dbg_pmx: cvbs_dbg@0 { + cvbs_dbg { + groups = "cvbs_dbg_grp"; + function = "cvbs_dbg"; + }; + }; + + cvbs_dbg_test_pmx0: cvbs_dbg_test@0 { + cvbs_dbg_test_0 { + groups = "cvbs_dbg_test_grp0"; + function = "cvbs_dbg_test_m0"; + }; + }; + + cvbs_dbg_test_pmx1: cvbs_dbg_test@1 { + cvbs_dbg_test_1 { + groups = "cvbs_dbg_test_grp1"; + function = "cvbs_dbg_test_m1"; + }; + }; + + cvbs_dbg_test_pmx2: cvbs_dbg_test@2 { + cvbs_dbg_test_2 { + groups = "cvbs_dbg_test_grp2"; + function = "cvbs_dbg_test_m2"; + }; + }; + + cvbs_dbg_test_pmx3: cvbs_dbg_test@3 { + cvbs_dbg_test_3 { + groups = "cvbs_dbg_test_grp3"; + function = "cvbs_dbg_test_m3"; + }; + }; + + cvbs_dbg_test_pmx4: cvbs_dbg_test@4 { + cvbs_dbg_test_4 { + groups = "cvbs_dbg_test_grp4"; + function = "cvbs_dbg_test_m4"; + }; + }; + + cvbs_dbg_test_pmx5: cvbs_dbg_test@5 { + cvbs_dbg_test_5 { + groups = "cvbs_dbg_test_grp5"; + function = "cvbs_dbg_test_m5"; + }; + }; + + cvbs_dbg_test_pmx6: cvbs_dbg_test@6 { + cvbs_dbg_test_6 { + groups = "cvbs_dbg_test_grp6"; + function = "cvbs_dbg_test_m6"; + }; + }; + + cvbs_dbg_test_pmx7: cvbs_dbg_test@7 { + cvbs_dbg_test_7 { + groups = "cvbs_dbg_test_grp7"; + function = "cvbs_dbg_test_m7"; + }; + }; + + cvbs_dbg_test_pmx8: cvbs_dbg_test@8 { + cvbs_dbg_test_8 { + groups = "cvbs_dbg_test_grp8"; + function = "cvbs_dbg_test_m8"; + }; + }; + + cvbs_dbg_test_pmx9: cvbs_dbg_test@9 { + cvbs_dbg_test_9 { + groups = "cvbs_dbg_test_grp9"; + function = "cvbs_dbg_test_m9"; + }; + }; + + cvbs_dbg_test_pmx10: cvbs_dbg_test@10 { + cvbs_dbg_test_10 { + groups = "cvbs_dbg_test_grp10"; + function = "cvbs_dbg_test_m10"; + }; + }; + + cvbs_dbg_test_pmx11: cvbs_dbg_test@11 { + cvbs_dbg_test_11 { + groups = "cvbs_dbg_test_grp11"; + function = "cvbs_dbg_test_m11"; + }; + }; + + cvbs_dbg_test_pmx12: cvbs_dbg_test@12 { + cvbs_dbg_test_12 { + groups = "cvbs_dbg_test_grp12"; + function = "cvbs_dbg_test_m12"; + }; + }; + + cvbs_dbg_test_pmx13: cvbs_dbg_test@13 { + cvbs_dbg_test_13 { + groups = "cvbs_dbg_test_grp13"; + function = "cvbs_dbg_test_m13"; + }; + }; + + cvbs_dbg_test_pmx14: cvbs_dbg_test@14 { + cvbs_dbg_test_14 { + groups = "cvbs_dbg_test_grp14"; + function = "cvbs_dbg_test_m14"; + }; + }; + + cvbs_dbg_test_pmx15: cvbs_dbg_test@15 { + cvbs_dbg_test_15 { + groups = "cvbs_dbg_test_grp15"; + function = "cvbs_dbg_test_m15"; + }; + }; + + gn_gnss_power_pmx: gn_gnss_power@0 { + gn_gnss_power { + groups = "gn_gnss_power_grp"; + function = "gn_gnss_power"; + }; + }; + + gn_gnss_sw_status_pmx: gn_gnss_sw_status@0 { + gn_gnss_sw_status { + groups = "gn_gnss_sw_status_grp"; + function = "gn_gnss_sw_status"; + }; + }; + + gn_gnss_eclk_pmx: gn_gnss_eclk@0 { + gn_gnss_eclk { + groups = "gn_gnss_eclk_grp"; + function = "gn_gnss_eclk"; + }; + }; + + gn_gnss_irq1_pmx0: gn_gnss_irq1@0 { + gn_gnss_irq1_0 { + groups = "gn_gnss_irq1_grp0"; + function = "gn_gnss_irq1_m0"; + }; + }; + + gn_gnss_irq2_pmx0: gn_gnss_irq2@0 { + gn_gnss_irq2_0 { + groups = "gn_gnss_irq2_grp0"; + function = "gn_gnss_irq2_m0"; + }; + }; + + gn_gnss_tm_pmx: gn_gnss_tm@0 { + gn_gnss_tm { + groups = "gn_gnss_tm_grp"; + function = "gn_gnss_tm"; + }; + }; + + gn_gnss_tsync_pmx: gn_gnss_tsync@0 { + gn_gnss_tsync { + groups = "gn_gnss_tsync_grp"; + function = "gn_gnss_tsync"; + }; + }; + + gn_io_gnsssys_sw_cfg_pmx: gn_io_gnsssys_sw_cfg@0 { + gn_io_gnsssys_sw_cfg { + groups = "gn_io_gnsssys_sw_cfg_grp"; + function = "gn_io_gnsssys_sw_cfg"; + }; + }; + + gn_trg_pmx0: gn_trg@0 { + gn_trg_0 { + groups = "gn_trg_grp0"; + function = "gn_trg_m0"; + }; + }; + + gn_trg_pmx1: gn_trg@1 { + gn_trg_1 { + groups = "gn_trg_grp1"; + function = "gn_trg_m1"; + }; + }; + + gn_trg_shutdown_pmx0: gn_trg_shutdown@0 { + gn_trg_shutdown_0 { + groups = "gn_trg_shutdown_grp0"; + function = "gn_trg_shutdown_m0"; + }; + }; + + gn_trg_shutdown_pmx1: gn_trg_shutdown@1 { + gn_trg_shutdown_1 { + groups = "gn_trg_shutdown_grp1"; + function = "gn_trg_shutdown_m1"; + }; + }; + + gn_trg_shutdown_pmx2: gn_trg_shutdown@2 { + gn_trg_shutdown_2 { + groups = "gn_trg_shutdown_grp2"; + function = "gn_trg_shutdown_m2"; + }; + }; + + gn_trg_shutdown_pmx3: gn_trg_shutdown@3 { + gn_trg_shutdown_3 { + groups = "gn_trg_shutdown_grp3"; + function = "gn_trg_shutdown_m3"; + }; + }; + + i2c0_pmx: i2c0@0 { + i2c0 { + groups = "i2c0_grp"; + function = "i2c0"; + }; + }; + + i2c1_pmx: i2c1@0 { + i2c1 { + groups = "i2c1_grp"; + function = "i2c1"; + }; + }; + + jtag_pmx0: jtag@0 { + jtag_0 { + groups = "jtag_grp0"; + function = "jtag_m0"; + }; + }; + + ks_kas_spi_pmx0: ks_kas_spi@0 { + ks_kas_spi_0 { + groups = "ks_kas_spi_grp0"; + function = "ks_kas_spi_m0"; + }; + }; + + ld_ldd_pmx: ld_ldd@0 { + ld_ldd { + groups = "ld_ldd_grp"; + function = "ld_ldd"; + }; + }; + + ld_ldd_16bit_pmx: ld_ldd_16bit@0 { + ld_ldd_16bit { + groups = "ld_ldd_16bit_grp"; + function = "ld_ldd_16bit"; + }; + }; + + ld_ldd_fck_pmx: ld_ldd_fck@0 { + ld_ldd_fck { + groups = "ld_ldd_fck_grp"; + function = "ld_ldd_fck"; + }; + }; + + ld_ldd_lck_pmx: ld_ldd_lck@0 { + ld_ldd_lck { + groups = "ld_ldd_lck_grp"; + function = "ld_ldd_lck"; + }; + }; + + lr_lcdrom_pmx: lr_lcdrom@0 { + lr_lcdrom { + groups = "lr_lcdrom_grp"; + function = "lr_lcdrom"; + }; + }; + + lvds_analog_pmx: lvds_analog@0 { + lvds_analog { + groups = "lvds_analog_grp"; + function = "lvds_analog"; + }; + }; + + nd_df_pmx: nd_df@0 { + nd_df { + groups = "nd_df_grp"; + function = "nd_df"; + }; + }; + + nd_df_nowp_pmx: nd_df_nowp@0 { + nd_df_nowp { + groups = "nd_df_nowp_grp"; + function = "nd_df_nowp"; + }; + }; + + ps_pmx: ps@0 { + ps { + groups = "ps_grp"; + function = "ps"; + }; + }; + + pwc_core_on_pmx: pwc_core_on@0 { + pwc_core_on { + groups = "pwc_core_on_grp"; + function = "pwc_core_on"; + }; + }; + + pwc_ext_on_pmx: pwc_ext_on@0 { + pwc_ext_on { + groups = "pwc_ext_on_grp"; + function = "pwc_ext_on"; + }; + }; + + pwc_gpio3_clk_pmx: pwc_gpio3_clk@0 { + pwc_gpio3_clk { + groups = "pwc_gpio3_clk_grp"; + function = "pwc_gpio3_clk"; + }; + }; + + pwc_io_on_pmx: pwc_io_on@0 { + pwc_io_on { + groups = "pwc_io_on_grp"; + function = "pwc_io_on"; + }; + }; + + pwc_lowbatt_b_pmx0: pwc_lowbatt_b@0 { + pwc_lowbatt_b_0 { + groups = "pwc_lowbatt_b_grp0"; + function = "pwc_lowbatt_b_m0"; + }; + }; + + pwc_mem_on_pmx: pwc_mem_on@0 { + pwc_mem_on { + groups = "pwc_mem_on_grp"; + function = "pwc_mem_on"; + }; + }; + + pwc_on_key_b_pmx0: pwc_on_key_b@0 { + pwc_on_key_b_0 { + groups = "pwc_on_key_b_grp0"; + function = "pwc_on_key_b_m0"; + }; + }; + + pwc_wakeup_src0_pmx: pwc_wakeup_src0@0 { + pwc_wakeup_src0 { + groups = "pwc_wakeup_src0_grp"; + function = "pwc_wakeup_src0"; + }; + }; + + pwc_wakeup_src1_pmx: pwc_wakeup_src1@0 { + pwc_wakeup_src1 { + groups = "pwc_wakeup_src1_grp"; + function = "pwc_wakeup_src1"; + }; + }; + + pwc_wakeup_src2_pmx: pwc_wakeup_src2@0 { + pwc_wakeup_src2 { + groups = "pwc_wakeup_src2_grp"; + function = "pwc_wakeup_src2"; + }; + }; + + pwc_wakeup_src3_pmx: pwc_wakeup_src3@0 { + pwc_wakeup_src3 { + groups = "pwc_wakeup_src3_grp"; + function = "pwc_wakeup_src3"; + }; + }; + + pw_cko0_pmx0: pw_cko0@0 { + pw_cko0_0 { + groups = "pw_cko0_grp0"; + function = "pw_cko0_m0"; + }; + }; + + pw_cko0_pmx1: pw_cko0@1 { + pw_cko0_1 { + groups = "pw_cko0_grp1"; + function = "pw_cko0_m1"; + }; + }; + + pw_cko0_pmx2: pw_cko0@2 { + pw_cko0_2 { + groups = "pw_cko0_grp2"; + function = "pw_cko0_m2"; + }; + }; + + pw_cko1_pmx0: pw_cko1@0 { + pw_cko1_0 { + groups = "pw_cko1_grp0"; + function = "pw_cko1_m0"; + }; + }; + + pw_cko1_pmx1: pw_cko1@1 { + pw_cko1_1 { + groups = "pw_cko1_grp1"; + function = "pw_cko1_m1"; + }; + }; + + pw_i2s01_clk_pmx0: pw_i2s01_clk@0 { + pw_i2s01_clk_0 { + groups = "pw_i2s01_clk_grp0"; + function = "pw_i2s01_clk_m0"; + }; + }; + + pw_i2s01_clk_pmx1: pw_i2s01_clk@1 { + pw_i2s01_clk_1 { + groups = "pw_i2s01_clk_grp1"; + function = "pw_i2s01_clk_m1"; + }; + }; + + pw_pwm0_pmx: pw_pwm0@0 { + pw_pwm0 { + groups = "pw_pwm0_grp"; + function = "pw_pwm0"; + }; + }; + + pw_pwm1_pmx: pw_pwm1@0 { + pw_pwm1 { + groups = "pw_pwm1_grp"; + function = "pw_pwm1"; + }; + }; + + pw_pwm2_pmx0: pw_pwm2@0 { + pw_pwm2_0 { + groups = "pw_pwm2_grp0"; + function = "pw_pwm2_m0"; + }; + }; + + pw_pwm2_pmx1: pw_pwm2@1 { + pw_pwm2_1 { + groups = "pw_pwm2_grp1"; + function = "pw_pwm2_m1"; + }; + }; + + pw_pwm3_pmx0: pw_pwm3@0 { + pw_pwm3_0 { + groups = "pw_pwm3_grp0"; + function = "pw_pwm3_m0"; + }; + }; + + pw_pwm3_pmx1: pw_pwm3@1 { + pw_pwm3_1 { + groups = "pw_pwm3_grp1"; + function = "pw_pwm3_m1"; + }; + }; + + pw_pwm_cpu_vol_pmx0: pw_pwm_cpu_vol@0 { + pw_pwm_cpu_vol_0 { + groups = "pw_pwm_cpu_vol_grp0"; + function = "pw_pwm_cpu_vol_m0"; + }; + }; + + pw_pwm_cpu_vol_pmx1: pw_pwm_cpu_vol@1 { + pw_pwm_cpu_vol_1 { + groups = "pw_pwm_cpu_vol_grp1"; + function = "pw_pwm_cpu_vol_m1"; + }; + }; + + pw_backlight_pmx0: pw_backlight@0 { + pw_backlight_0 { + groups = "pw_backlight_grp0"; + function = "pw_backlight_m0"; + }; + }; + + pw_backlight_pmx1: pw_backlight@1 { + pw_backlight_1 { + groups = "pw_backlight_grp1"; + function = "pw_backlight_m1"; + }; + }; + + rg_eth_mac_pmx: rg_eth_mac@0 { + rg_eth_mac { + groups = "rg_eth_mac_grp"; + function = "rg_eth_mac"; + }; + }; + + rg_gmac_phy_intr_n_pmx: rg_gmac_phy_intr_n@0 { + rg_gmac_phy_intr_n { + groups = "rg_gmac_phy_intr_n_grp"; + function = "rg_gmac_phy_intr_n"; + }; + }; + + rg_rgmii_mac_pmx: rg_rgmii_mac@0 { + rg_rgmii_mac { + groups = "rg_rgmii_mac_grp"; + function = "rg_rgmii_mac"; + }; + }; + + rg_rgmii_phy_ref_clk_pmx0: rg_rgmii_phy_ref_clk@0 { + rg_rgmii_phy_ref_clk_0 { + groups = + "rg_rgmii_phy_ref_clk_grp0"; + function = + "rg_rgmii_phy_ref_clk_m0"; + }; + }; + + rg_rgmii_phy_ref_clk_pmx1: rg_rgmii_phy_ref_clk@1 { + rg_rgmii_phy_ref_clk_1 { + groups = + "rg_rgmii_phy_ref_clk_grp1"; + function = + "rg_rgmii_phy_ref_clk_m1"; + }; + }; + + sd0_pmx: sd0@0 { + sd0 { + groups = "sd0_grp"; + function = "sd0"; + }; + }; + + sd0_4bit_pmx: sd0_4bit@0 { + sd0_4bit { + groups = "sd0_4bit_grp"; + function = "sd0_4bit"; + }; + }; + + sd1_pmx: sd1@0 { + sd1 { + groups = "sd1_grp"; + function = "sd1"; + }; + }; + + sd1_4bit_pmx0: sd1_4bit@0 { + sd1_4bit_0 { + groups = "sd1_4bit_grp0"; + function = "sd1_4bit_m0"; + }; + }; + + sd1_4bit_pmx1: sd1_4bit@1 { + sd1_4bit_1 { + groups = "sd1_4bit_grp1"; + function = "sd1_4bit_m1"; + }; + }; + + sd2_pmx0: sd2@0 { + sd2_0 { + groups = "sd2_grp0"; + function = "sd2_m0"; + }; + }; + + sd2_no_cdb_pmx0: sd2_no_cdb@0 { + sd2_no_cdb_0 { + groups = "sd2_no_cdb_grp0"; + function = "sd2_no_cdb_m0"; + }; + }; + + sd3_pmx: sd3@0 { + sd3 { + groups = "sd3_grp"; + function = "sd3"; + }; + }; + + sd5_pmx: sd5@0 { + sd5 { + groups = "sd5_grp"; + function = "sd5"; + }; + }; + + sd6_pmx0: sd6@0 { + sd6_0 { + groups = "sd6_grp0"; + function = "sd6_m0"; + }; + }; + + sd6_pmx1: sd6@1 { + sd6_1 { + groups = "sd6_grp1"; + function = "sd6_m1"; + }; + }; + + sp0_ext_ldo_on_pmx: sp0_ext_ldo_on@0 { + sp0_ext_ldo_on { + groups = "sp0_ext_ldo_on_grp"; + function = "sp0_ext_ldo_on"; + }; + }; + + sp0_qspi_pmx: sp0_qspi@0 { + sp0_qspi { + groups = "sp0_qspi_grp"; + function = "sp0_qspi"; + }; + }; + + sp1_spi_pmx: sp1_spi@0 { + sp1_spi { + groups = "sp1_spi_grp"; + function = "sp1_spi"; + }; + }; + + tpiu_trace_pmx: tpiu_trace@0 { + tpiu_trace { + groups = "tpiu_trace_grp"; + function = "tpiu_trace"; + }; + }; + + uart0_pmx: uart0@0 { + uart0 { + groups = "uart0_grp"; + function = "uart0"; + }; + }; + + uart0_nopause_pmx: uart0_nopause@0 { + uart0_nopause { + groups = "uart0_nopause_grp"; + function = "uart0_nopause"; + }; + }; + + uart1_pmx: uart1@0 { + uart1 { + groups = "uart1_grp"; + function = "uart1"; + }; + }; + + uart2_pmx: uart2@0 { + uart2 { + groups = "uart2_grp"; + function = "uart2"; + }; + }; + + uart3_pmx0: uart3@0 { + uart3_0 { + groups = "uart3_grp0"; + function = "uart3_m0"; + }; + }; + + uart3_pmx1: uart3@1 { + uart3_1 { + groups = "uart3_grp1"; + function = "uart3_m1"; + }; + }; + + uart3_pmx2: uart3@2 { + uart3_2 { + groups = "uart3_grp2"; + function = "uart3_m2"; + }; + }; + + uart3_pmx3: uart3@3 { + uart3_3 { + groups = "uart3_grp3"; + function = "uart3_m3"; + }; + }; + + uart3_nopause_pmx0: uart3_nopause@0 { + uart3_nopause_0 { + groups = "uart3_nopause_grp0"; + function = "uart3_nopause_m0"; + }; + }; + + uart3_nopause_pmx1: uart3_nopause@1 { + uart3_nopause_1 { + groups = "uart3_nopause_grp1"; + function = "uart3_nopause_m1"; + }; + }; + + uart4_pmx0: uart4@0 { + uart4_0 { + groups = "uart4_grp0"; + function = "uart4_m0"; + }; + }; + + uart4_pmx1: uart4@1 { + uart4_1 { + groups = "uart4_grp1"; + function = "uart4_m1"; + }; + }; + + uart4_pmx2: uart4@2 { + uart4_2 { + groups = "uart4_grp2"; + function = "uart4_m2"; + }; + }; + + uart4_nopause_pmx: uart4_nopause@0 { + uart4_nopause { + groups = "uart4_nopause_grp"; + function = "uart4_nopause"; + }; + }; + + usb0_drvvbus_pmx: usb0_drvvbus@0 { + usb0_drvvbus { + groups = "usb0_drvvbus_grp"; + function = "usb0_drvvbus"; + }; + }; + + usb1_drvvbus_pmx: usb1_drvvbus@0 { + usb1_drvvbus { + groups = "usb1_drvvbus_grp"; + function = "usb1_drvvbus"; + }; + }; + + visbus_dout_pmx: visbus_dout@0 { + visbus_dout { + groups = "visbus_dout_grp"; + function = "visbus_dout"; + }; + }; + + vi_vip1_pmx: vi_vip1@0 { + vi_vip1 { + groups = "vi_vip1_grp"; + function = "vi_vip1"; + }; + }; + + vi_vip1_ext_pmx: vi_vip1_ext@0 { + vi_vip1_ext { + groups = "vi_vip1_ext_grp"; + function = "vi_vip1_ext"; + }; + }; + + vi_vip1_low8bit_pmx: vi_vip1_low8bit@0 { + vi_vip1_low8bit { + groups = "vi_vip1_low8bit_grp"; + function = "vi_vip1_low8bit"; + }; + }; + + vi_vip1_high8bit_pmx: vi_vip1_high8bit@0 { + vi_vip1_high8bit { + groups = "vi_vip1_high8bit_grp"; + function = "vi_vip1_high8bit"; + }; + }; }; pmipc { @@ -341,6 +1360,12 @@ clock-names = "gpio0_io"; gpio-controller; interrupt-controller; + + gpio-banks = <2>; + gpio-ranges = <&pinctrl 0 0 0>, + <&pinctrl 32 0 0>; + gpio-ranges-group-names = "lvds_gpio_grp", + "uart_nand_gpio_grp"; }; nand@17050000 { @@ -446,11 +1471,22 @@ #interrupt-cells = <2>; compatible = "sirf,atlas7-gpio"; reg = <0x13300000 0x1000>; - interrupts = <0 43 0>, <0 44 0>, <0 45 0>; + interrupts = <0 43 0>, <0 44 0>, + <0 45 0>, <0 46 0>; clocks = <&car 84>; clock-names = "gpio1_io"; gpio-controller; interrupt-controller; + + gpio-banks = <4>; + gpio-ranges = <&pinctrl 0 0 0>, + <&pinctrl 32 0 0>, + <&pinctrl 64 0 0>, + <&pinctrl 96 0 0>; + gpio-ranges-group-names = "gnss_gpio_grp", + "lcd_vip_gpio_grp", + "sdio_i2s_gpio_grp", + "sp_rgmii_gpio_grp"; }; sd2: sdhci@14200000 { @@ -729,6 +1765,10 @@ interrupts = <0 47 0>; gpio-controller; interrupt-controller; + + gpio-banks = <1>; + gpio-ranges = <&pinctrl 0 0 0>; + gpio-ranges-group-names = "rtc_gpio_grp"; }; rtc-iobg@18840000 { -- cgit From b1999477ed91c3c33891acfe0e18a4457e5c4915 Mon Sep 17 00:00:00 2001 From: Guo Zeng Date: Tue, 14 Apr 2015 11:55:55 +0000 Subject: ARM: prima2: move to use REGMAP APIs for rtciobrg all devices behind rtciobrg needs a special way to access. currently they are using a platform-specific API. this patch moves to REGMAP, then clients can use regmap APIs to read/write. for the moment, old APIs are still kept, once all clients move to regmap, old APIs will be dropped. this patch also does minor clean for comments, authors statement. Signed-off-by: Guo Zeng Signed-off-by: Barry Song --- arch/arm/mach-prima2/Kconfig | 1 + arch/arm/mach-prima2/rtciobrg.c | 48 +++++++++++++++++++++++++++++++++--- include/linux/rtc/sirfsoc_rtciobrg.h | 4 +++ 3 files changed, 50 insertions(+), 3 deletions(-) diff --git a/arch/arm/mach-prima2/Kconfig b/arch/arm/mach-prima2/Kconfig index e03d8b5c9ad0..9ab8932403e5 100644 --- a/arch/arm/mach-prima2/Kconfig +++ b/arch/arm/mach-prima2/Kconfig @@ -4,6 +4,7 @@ menuconfig ARCH_SIRF select ARCH_REQUIRE_GPIOLIB select GENERIC_IRQ_CHIP select NO_IOPORT_MAP + select REGMAP select PINCTRL select PINCTRL_SIRF help diff --git a/arch/arm/mach-prima2/rtciobrg.c b/arch/arm/mach-prima2/rtciobrg.c index 8f66d8f7ca75..d4852d24dc7d 100644 --- a/arch/arm/mach-prima2/rtciobrg.c +++ b/arch/arm/mach-prima2/rtciobrg.c @@ -1,5 +1,5 @@ /* - * RTC I/O Bridge interfaces for CSR SiRFprimaII + * RTC I/O Bridge interfaces for CSR SiRFprimaII/atlas7 * ARM access the registers of SYSRTC, GPSRTC and PWRC through this module * * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company. @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -66,6 +67,7 @@ u32 sirfsoc_rtc_iobrg_readl(u32 addr) { unsigned long flags, val; + /* TODO: add hwspinlock to sync with M3 */ spin_lock_irqsave(&rtciobrg_lock, flags); val = __sirfsoc_rtc_iobrg_readl(addr); @@ -90,6 +92,7 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr) { unsigned long flags; + /* TODO: add hwspinlock to sync with M3 */ spin_lock_irqsave(&rtciobrg_lock, flags); sirfsoc_rtc_iobrg_pre_writel(val, addr); @@ -102,6 +105,45 @@ void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr) } EXPORT_SYMBOL_GPL(sirfsoc_rtc_iobrg_writel); + +static int regmap_iobg_regwrite(void *context, unsigned int reg, + unsigned int val) +{ + sirfsoc_rtc_iobrg_writel(val, reg); + return 0; +} + +static int regmap_iobg_regread(void *context, unsigned int reg, + unsigned int *val) +{ + *val = (u32)sirfsoc_rtc_iobrg_readl(reg); + return 0; +} + +static struct regmap_bus regmap_iobg = { + .reg_write = regmap_iobg_regwrite, + .reg_read = regmap_iobg_regread, +}; + +/** + * devm_regmap_init_iobg(): Initialise managed register map + * + * @iobg: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config) +{ + const struct regmap_bus *bus = ®map_iobg; + + return devm_regmap_init(dev, bus, dev, config); +} +EXPORT_SYMBOL_GPL(devm_regmap_init_iobg); + static const struct of_device_id rtciobrg_ids[] = { { .compatible = "sirf,prima2-rtciobg" }, {} @@ -132,7 +174,7 @@ static int __init sirfsoc_rtciobrg_init(void) } postcore_initcall(sirfsoc_rtciobrg_init); -MODULE_AUTHOR("Zhiwu Song , " - "Barry Song "); +MODULE_AUTHOR("Zhiwu Song "); +MODULE_AUTHOR("Barry Song "); MODULE_DESCRIPTION("CSR SiRFprimaII rtc io bridge"); MODULE_LICENSE("GPL v2"); diff --git a/include/linux/rtc/sirfsoc_rtciobrg.h b/include/linux/rtc/sirfsoc_rtciobrg.h index 2c92e1c8e055..aefd997262e4 100644 --- a/include/linux/rtc/sirfsoc_rtciobrg.h +++ b/include/linux/rtc/sirfsoc_rtciobrg.h @@ -9,10 +9,14 @@ #ifndef _SIRFSOC_RTC_IOBRG_H_ #define _SIRFSOC_RTC_IOBRG_H_ +struct regmap_config; + extern void sirfsoc_rtc_iobrg_besyncing(void); extern u32 sirfsoc_rtc_iobrg_readl(u32 addr); extern void sirfsoc_rtc_iobrg_writel(u32 val, u32 addr); +struct regmap *devm_regmap_init_iobg(struct device *dev, + const struct regmap_config *config); #endif -- cgit From a903e3b64adfc2e126237f06e0b3ea7d2d8d13b5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 17 Mar 2015 15:31:11 +0200 Subject: drm/omap: return error if dma_alloc_writecombine fails On a platform with no TILER (e.g. omap3, am43xx), when the user wants to allocate buffer with flag OMAP_BO_SCANOUT, the buffer needs to be allocated with dma_alloc_writecombine. For some reason the driver does not return an error if that alloc fails, instead it continues without backing memory. This leads to errors later when the user tries to use the buffer. This patch makes the driver return an error if dma_alloc_writecombine fails. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_gem.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 2ab77801cf5f..874eb6dcf94b 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -1378,11 +1378,7 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, omap_obj = kzalloc(sizeof(*omap_obj), GFP_KERNEL); if (!omap_obj) - goto fail; - - spin_lock(&priv->list_lock); - list_add(&omap_obj->mm_list, &priv->obj_list); - spin_unlock(&priv->list_lock); + return NULL; obj = &omap_obj->base; @@ -1392,11 +1388,19 @@ struct drm_gem_object *omap_gem_new(struct drm_device *dev, */ omap_obj->vaddr = dma_alloc_writecombine(dev->dev, size, &omap_obj->paddr, GFP_KERNEL); - if (omap_obj->vaddr) - flags |= OMAP_BO_DMA; + if (!omap_obj->vaddr) { + kfree(omap_obj); + return NULL; + } + + flags |= OMAP_BO_DMA; } + spin_lock(&priv->list_lock); + list_add(&omap_obj->mm_list, &priv->obj_list); + spin_unlock(&priv->list_lock); + omap_obj->flags = flags; if (flags & OMAP_BO_TILED) { -- cgit From 398f74569cebbf06bc6b069442bcd0e9616ca465 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 19 Jun 2015 21:37:56 +0100 Subject: ARM: 8393/1: smp: Fix suspicious RCU usage with ipi tracepoints John Stultz reports an RCU splat on boot with ARM ipi trace events enabled. =============================== [ INFO: suspicious RCU usage. ] 4.1.0-rc7-00033-gb5bed2f #153 Not tainted ------------------------------- include/trace/events/ipi.h:68 suspicious rcu_dereference_check() usage! other info that might help us debug this: RCU used illegally from idle CPU! rcu_scheduler_active = 1, debug_locks = 0 RCU used illegally from extended quiescent state! no locks held by swapper/0/0. stack backtrace: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.1.0-rc7-00033-gb5bed2f #153 Hardware name: Qualcomm (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x10/0x14) [] (show_stack) from [] (dump_stack+0x70/0xbc) [] (dump_stack) from [] (handle_IPI+0x428/0x604) [] (handle_IPI) from [] (gic_handle_irq+0x54/0x5c) [] (gic_handle_irq) from [] (__irq_svc+0x44/0x7c) Exception stack(0xc09f3f48 to 0xc09f3f90) 3f40: 00000001 00000001 00000000 c09f73b8 c09f4528 c0a5de9c 3f60: c076b4f0 00000000 00000000 c09ef108 c0a5cec1 00000001 00000000 c09f3f90 3f80: c026bf60 c0210ab8 20000113 ffffffff [] (__irq_svc) from [] (arch_cpu_idle+0x20/0x3c) [] (arch_cpu_idle) from [] (cpu_startup_entry+0x2c0/0x5dc) [] (cpu_startup_entry) from [] (start_kernel+0x358/0x3c4) [] (start_kernel) from [<8020807c>] (0x8020807c) At this point in the IPI handling path we haven't called irq_enter() yet, so RCU doesn't know that we're about to exit idle and properly warns that we're using RCU from an idle CPU. Use trace_ipi_entry_rcuidle() instead of trace_ipi_entry() so that RCU is informed about our exit from idle. Fixes: 365ec7b17327 ("ARM: add IPI tracepoints") Reported-by: John Stultz Tested-by: John Stultz Acked-by: Steven Rostedt Reviewed-by: Paul E. McKenney Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index cca5b8758185..f11d82527076 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -576,7 +576,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) struct pt_regs *old_regs = set_irq_regs(regs); if ((unsigned)ipinr < NR_IPI) { - trace_ipi_entry(ipi_types[ipinr]); + trace_ipi_entry_rcuidle(ipi_types[ipinr]); __inc_irq_stat(cpu, ipi_irqs[ipinr]); } @@ -635,7 +635,7 @@ void handle_IPI(int ipinr, struct pt_regs *regs) } if ((unsigned)ipinr < NR_IPI) - trace_ipi_exit(ipi_types[ipinr]); + trace_ipi_exit_rcuidle(ipi_types[ipinr]); set_irq_regs(old_regs); } -- cgit From 3de1f52a3ae823265299409e276f56cdadddc310 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Thu, 25 Jun 2015 01:04:20 +0100 Subject: ARM: 8394/1: update memblock limit after mapping lowmem The memblock limit is currently used in find_limits to find the bounds for ZONE_NORMAL. The memblock limit may need to be rounded down a PMD size to ensure allocations are fully mapped though. This has the side effect of reducing the amount of memory in ZONE_NORMAL. Once all lowmem is mapped, it's safe to change the memblock limit back to include the unaligned section. Adjust the memblock limit after lowmem mapping is complete. Before: # cat /proc/zoneinfo | grep managed managed 62907 managed 424 After: # cat /proc/zoneinfo | grep managed managed 63331 Signed-off-by: Laura Abbott Reviewed-by: Mark Rutland Tested-by: Mark Rutland Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 7186382672b5..904d1532e6d0 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1519,6 +1519,7 @@ void __init paging_init(const struct machine_desc *mdesc) build_mem_type_table(); prepare_page_table(); map_lowmem(); + memblock_set_current_limit(arm_lowmem_limit); dma_contiguous_remap(); devicemaps_init(mdesc); kmap_init(); -- cgit From e48866647b486f31ff7c3927b48de8bbb1c6a4c0 Mon Sep 17 00:00:00 2001 From: Vitaly Andrianov Date: Fri, 26 Jun 2015 17:13:03 +0100 Subject: ARM: 8396/1: use phys_addr_t in pfn_to_kaddr() This patch fixes pfn_to_kaddr() to use phys_addr_t. Without this, this macro is broken on LPAE systems. For physical addresses above first 4GB result of shifting pfn with PAGE_SHIFT may be truncated. Signed-off-by: Vitaly Andrianov Acked-by: Nicolas Pitre Acked-by: Santosh Shilimkar Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 184def0e1652..063ef314cca9 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -291,7 +291,7 @@ static inline void *phys_to_virt(phys_addr_t x) */ #define __pa(x) __virt_to_phys((unsigned long)(x)) #define __va(x) ((void *)__phys_to_virt((phys_addr_t)(x))) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((phys_addr_t)(pfn) << PAGE_SHIFT) extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); -- cgit From e6ae32c343981a50aa07c34767f2a967cc920edc Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:34:38 +0100 Subject: ARM: fix DEBUG_SET_MODULE_RONX build dependencies randconfig testing reveals that DEBUG_SET_MODULE_RONX needs to depend on MMU otherwise these build errors are observed: kernel/built-in.o: In function `set_section_ro_nx': kernel/module.c:1738: undefined reference to `set_memory_nx' kernel/built-in.o: In function `set_page_attributes': kernel/module.c:1709: undefined reference to `set_memory_ro' This is because the pageattr functions are not built for !MMU configs as they don't have page tables. Signed-off-by: Russell King --- arch/arm/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 0c12ffb155a2..dfa21cb58f36 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1590,7 +1590,7 @@ config PID_IN_CONTEXTIDR config DEBUG_SET_MODULE_RONX bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES + depends on MODULES && MMU ---help--- This option helps catch unintended modifications to loadable kernel module's text and read-only data. It also prevents execution -- cgit From b4d103d1a45fed0da2f31c905eb5e053c84a41c6 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:49:45 +0100 Subject: ARM: add help text for HIGHPTE configuration entry Add some help text for the HIGHPTE configuration entry. This comes from the x86 entry, but reworded to be more a more accurate description of what this option does. Signed-off-by: Russell King --- arch/arm/Kconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 45df48ba0b12..cc0ea0c6cfc9 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1661,6 +1661,12 @@ config HIGHMEM config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM + help + The VM uses one page of physical memory for each page table. + For systems with a lot of processes, this can use a lot of + precious low memory, eventually leading to low memory being + consumed by page tables. Setting this option will allow + user-space 2nd level page tables to reside in high memory. config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" -- cgit From eeb3fee8f6cca5e7bf1647d9e327c7b40e384578 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 25 Jun 2015 10:52:58 +0100 Subject: ARM: add helpful message when truncating physical memory Add a nmessage to suggest that HIGHMEM is enabled when physical memory is truncated due to lack of virtual address space to map it in the low memory mapping. Signed-off-by: Russell King --- arch/arm/mm/mmu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 904d1532e6d0..79de062c6077 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1072,6 +1072,7 @@ void __init sanity_check_meminfo(void) int highmem = 0; phys_addr_t vmalloc_limit = __pa(vmalloc_min - 1) + 1; struct memblock_region *reg; + bool should_use_highmem = false; for_each_memblock(memory, reg) { phys_addr_t block_start = reg->base; @@ -1090,6 +1091,7 @@ void __init sanity_check_meminfo(void) pr_notice("Ignoring RAM at %pa-%pa (!CONFIG_HIGHMEM)\n", &block_start, &block_end); memblock_remove(reg->base, reg->size); + should_use_highmem = true; continue; } @@ -1100,6 +1102,7 @@ void __init sanity_check_meminfo(void) &block_start, &block_end, &vmalloc_limit); memblock_remove(vmalloc_limit, overlap_size); block_end = vmalloc_limit; + should_use_highmem = true; } } @@ -1134,6 +1137,9 @@ void __init sanity_check_meminfo(void) } } + if (should_use_highmem) + pr_notice("Consider using a HIGHMEM enabled kernel.\n"); + high_memory = __va(arm_lowmem_limit - 1) + 1; /* -- cgit From 31f02455455d405320e2f749696bef4e02903b35 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 12:07:17 -0400 Subject: sparse: fix misplaced __pmem definition Move the definition of __pmem outside of CONFIG_SPARSE_RCU_POINTER to fix: drivers/nvdimm/pmem.c:198:17: sparse: too many arguments for function __builtin_expect drivers/nvdimm/pmem.c:36:33: sparse: expected ; at end of declaration drivers/nvdimm/pmem.c:48:21: sparse: void declaration ...due to __pmem failing to be defined in some configurations when CONFIG_SPARSE_RCU_POINTER=y. Reported-by: kbuild test robot Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 26fc8bc77f85..d8fbd500330e 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -17,11 +17,11 @@ # define __release(x) __context__(x,-1) # define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) # define __percpu __attribute__((noderef, address_space(3))) +# define __pmem __attribute__((noderef, address_space(5))) #ifdef CONFIG_SPARSE_RCU_POINTER # define __rcu __attribute__((noderef, address_space(4))) #else # define __rcu -# define __pmem __attribute__((noderef, address_space(5))) #endif extern void __chk_user_ptr(const volatile void __user *); extern void __chk_io_ptr(const volatile void __iomem *); -- cgit From af834d457d9ed69e14836b63d0da198fdd2ec706 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 14:10:09 -0400 Subject: libnvdimm: smatch cleanups in __nd_ioctl Drop use of access_ok() since we are already using copy_{to|from}_user() which do their own access_ok(). Reported-by: Dan Carpenter Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 8eb22c0ca7ce..73442bd824a7 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -535,8 +535,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -ENXIO; } - if (!access_ok(VERIFY_READ, p + in_len, in_size)) - return -EFAULT; if (in_len < sizeof(in_env)) copy = min_t(u32, sizeof(in_env) - in_len, in_size); else @@ -557,8 +555,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, __func__, dimm_name, cmd_name, i); return -EFAULT; } - if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size)) - return -EFAULT; if (out_len < sizeof(out_env)) copy = min_t(u32, sizeof(out_env) - out_len, out_size); else @@ -570,9 +566,6 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } buf_len = out_len + in_len; - if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len))) - return -EFAULT; - if (buf_len > ND_IOCTL_MAX_BUFLEN) { dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__, dimm_name, cmd_name, buf_len, -- cgit From daa1dee405d7d3d3e816b84a692e838a5647a02a Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Sun, 28 Jun 2015 17:00:57 +0800 Subject: nvdimm: Fix return value of nvdimm_bus_init() if class_create() fails Return proper error if class_create() fails. Signed-off-by: Axel Lin Signed-off-by: Dan Williams --- drivers/nvdimm/bus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 73442bd824a7..7e2c43f701bc 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -699,8 +699,10 @@ int __init nvdimm_bus_init(void) nvdimm_major = rc; nd_class = class_create(THIS_MODULE, "nd"); - if (IS_ERR(nd_class)) + if (IS_ERR(nd_class)) { + rc = PTR_ERR(nd_class); goto err_class; + } return 0; -- cgit From 193ccca43850d2355e7690a93ab9d7d78d38f905 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 30 Jun 2015 16:09:39 -0400 Subject: nfit: fix smatch "use after null check" report drivers/acpi/nfit.c:1224 acpi_nfit_blk_region_enable() error: we previously assumed 'nfit_mem' could be null (see line 1223) drivers/acpi/nfit.c 1222 nfit_mem = nvdimm_provider_data(nvdimm); 1223 if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { ^^^^^^^^ Check. 1224 dev_dbg(dev, "%s: missing%s%s%s\n", __func__, 1225 nfit_mem ? "" : " nfit_mem", 1226 nfit_mem->dcr ? "" : " dcr", ^^^^^^^^^^^^^ Unchecked dereference. Reported-by: Dan Carpenter Acked-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 2161fa178c8d..a20b7c883ca0 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1223,8 +1223,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) { dev_dbg(dev, "%s: missing%s%s%s\n", __func__, nfit_mem ? "" : " nfit_mem", - nfit_mem->dcr ? "" : " dcr", - nfit_mem->bdw ? "" : " bdw"); + (nfit_mem && nfit_mem->dcr) ? "" : " dcr", + (nfit_mem && nfit_mem->bdw) ? "" : " bdw"); return -ENXIO; } -- cgit From fe7599079b03d521d376da88920cc7b87f71ae25 Mon Sep 17 00:00:00 2001 From: Yang Dongsheng Date: Wed, 3 Jun 2015 14:57:32 +0800 Subject: btrfs: qgroup: allow user to clear the limitation on qgroup Currently, we can only set a limitation on a qgroup, but we can not clear it. This patch provide a choice to user to clear a limitation on qgroup by passing a value of CLEAR_VALUE(-1) to kernel. Reported-by: Tsutomu Itoh Signed-off-by: Dongsheng Yang Tested-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/qgroup.c | 49 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index d5f1f033b7a0..e9ace099162c 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1349,6 +1349,11 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, struct btrfs_root *quota_root; struct btrfs_qgroup *qgroup; int ret = 0; + /* Sometimes we would want to clear the limit on this qgroup. + * To meet this requirement, we treat the -1 as a special value + * which tell kernel to clear the limit on this qgroup. + */ + const u64 CLEAR_VALUE = -1; mutex_lock(&fs_info->qgroup_ioctl_lock); quota_root = fs_info->quota_root; @@ -1364,14 +1369,42 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans, } spin_lock(&fs_info->qgroup_lock); - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) - qgroup->max_rfer = limit->max_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) - qgroup->max_excl = limit->max_excl; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) - qgroup->rsv_rfer = limit->rsv_rfer; - if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) - qgroup->rsv_excl = limit->rsv_excl; + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_RFER) { + if (limit->max_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_RFER; + qgroup->max_rfer = 0; + } else { + qgroup->max_rfer = limit->max_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_MAX_EXCL) { + if (limit->max_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_MAX_EXCL; + qgroup->max_excl = 0; + } else { + qgroup->max_excl = limit->max_excl; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_RFER) { + if (limit->rsv_rfer == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_RFER; + qgroup->rsv_rfer = 0; + } else { + qgroup->rsv_rfer = limit->rsv_rfer; + } + } + if (limit->flags & BTRFS_QGROUP_LIMIT_RSV_EXCL) { + if (limit->rsv_excl == CLEAR_VALUE) { + qgroup->lim_flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + limit->flags &= ~BTRFS_QGROUP_LIMIT_RSV_EXCL; + qgroup->rsv_excl = 0; + } else { + qgroup->rsv_excl = limit->rsv_excl; + } + } qgroup->lim_flags |= limit->flags; spin_unlock(&fs_info->qgroup_lock); -- cgit From 65f5333875d7bbfc13436e224a181d10a80d1ada Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Mon, 8 Jun 2015 20:05:50 +0800 Subject: btrfs: cleanup noused initialization of dev in btrfs_end_bio() It is introduced by: c404e0dc2c843b154f9a36c3aec10d0a715d88eb Btrfs: fix use-after-free in the finishing procedure of the device replace But seems no relationship with that bug, this patch revirt these code block for cleanup. Signed-off-by: Zhao Lei Signed-off-by: Chris Mason --- fs/btrfs/volumes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 83966996aacb..d4cd4059bded 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5715,7 +5715,6 @@ static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int e static void btrfs_end_bio(struct bio *bio, int err) { struct btrfs_bio *bbio = bio->bi_private; - struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; if (err) { @@ -5723,6 +5722,7 @@ static void btrfs_end_bio(struct bio *bio, int err) if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; + struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; -- cgit From e82afc52abff07a4acbc90f899598ebafb662831 Mon Sep 17 00:00:00 2001 From: Zhao Lei Date: Fri, 12 Jun 2015 20:36:58 +0800 Subject: btrfs: add error handling for scrub_workers_get() Although it is a rare case, we'd better free previous allocated memory on error. Signed-off-by: Zhao Lei Signed-off-by: Qu Wenruo Signed-off-by: Chris Mason --- fs/btrfs/scrub.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 9f2feabe99f2..94db0fa5225a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3571,7 +3571,6 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, int is_dev_replace) { - int ret = 0; unsigned int flags = WQ_FREEZABLE | WQ_UNBOUND; int max_active = fs_info->thread_pool_size; @@ -3584,34 +3583,36 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, fs_info->scrub_workers = btrfs_alloc_workqueue("btrfs-scrub", flags, max_active, 4); - if (!fs_info->scrub_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_workers) + goto fail_scrub_workers; + fs_info->scrub_wr_completion_workers = btrfs_alloc_workqueue("btrfs-scrubwrc", flags, max_active, 2); - if (!fs_info->scrub_wr_completion_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_wr_completion_workers) + goto fail_scrub_wr_completion_workers; + fs_info->scrub_nocow_workers = btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); - if (!fs_info->scrub_nocow_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_nocow_workers) + goto fail_scrub_nocow_workers; fs_info->scrub_parity_workers = btrfs_alloc_workqueue("btrfs-scrubparity", flags, max_active, 2); - if (!fs_info->scrub_parity_workers) { - ret = -ENOMEM; - goto out; - } + if (!fs_info->scrub_parity_workers) + goto fail_scrub_parity_workers; } ++fs_info->scrub_workers_refcnt; -out: - return ret; + return 0; + +fail_scrub_parity_workers: + btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); +fail_scrub_nocow_workers: + btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); +fail_scrub_wr_completion_workers: + btrfs_destroy_workqueue(fs_info->scrub_workers); +fail_scrub_workers: + return -ENOMEM; } static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) -- cgit From 67c5e7d464bc466471b05e027abe8a6b29687ebd Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 11 Jun 2015 00:58:53 +0100 Subject: Btrfs: fix race between balance and unused block group deletion We have a race between deleting an unused block group and balancing the same block group that leads to an assertion failure/BUG(), producing the following trace: [181631.208236] BTRFS: assertion failed: 0, file: fs/btrfs/volumes.c, line: 2622 [181631.220591] ------------[ cut here ]------------ [181631.222959] kernel BUG at fs/btrfs/ctree.h:4062! [181631.223932] invalid opcode: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [181631.224566] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse acpi_cpufreq parpor$ [181631.224566] CPU: 8 PID: 17451 Comm: btrfs Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [181631.224566] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [181631.224566] task: ffff880127e09590 ti: ffff8800b5824000 task.ti: ffff8800b5824000 [181631.224566] RIP: 0010:[] [] assfail.constprop.50+0x1e/0x20 [btrfs] [181631.224566] RSP: 0018:ffff8800b5827ae8 EFLAGS: 00010246 [181631.224566] RAX: 0000000000000040 RBX: ffff8800109fc218 RCX: ffffffff81095dce [181631.224566] RDX: 0000000000005124 RSI: ffffffff81464819 RDI: 00000000ffffffff [181631.224566] RBP: ffff8800b5827ae8 R08: 0000000000000001 R09: 0000000000000000 [181631.224566] R10: 0000000000000000 R11: 0000000000000000 R12: ffff8800109fc200 [181631.224566] R13: ffff880020095000 R14: ffff8800b1a13f38 R15: ffff880020095000 [181631.224566] FS: 00007f70ca0b0c80(0000) GS:ffff88013ec00000(0000) knlGS:0000000000000000 [181631.224566] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [181631.224566] CR2: 00007f2872ab6e68 CR3: 00000000a717c000 CR4: 00000000000006e0 [181631.224566] Stack: [181631.224566] ffff8800b5827ba8 ffffffffa03f3916 ffff8800b5827b38 ffffffffa03d080e [181631.224566] ffffffffa03d1423 ffff880020095000 ffff88001233c000 0000000000000001 [181631.224566] ffff880020095000 ffff8800b1a13f38 0000000a69c00000 0000000000000000 [181631.224566] Call Trace: [181631.224566] [] btrfs_remove_chunk+0xa4/0x6bb [btrfs] [181631.224566] [] ? join_transaction.isra.8+0xb9/0x3ba [btrfs] [181631.224566] [] ? wait_current_trans.isra.13+0x22/0xfc [btrfs] [181631.224566] [] btrfs_relocate_chunk.isra.29+0x8f/0xa7 [btrfs] [181631.224566] [] btrfs_balance+0xaa4/0xc52 [btrfs] [181631.224566] [] btrfs_ioctl_balance+0x23f/0x2b0 [btrfs] [181631.224566] [] ? trace_hardirqs_on+0xd/0xf [181631.224566] [] btrfs_ioctl+0xfe2/0x2220 [btrfs] [181631.224566] [] ? __this_cpu_preempt_check+0x13/0x15 [181631.224566] [] ? arch_local_irq_save+0x9/0xc [181631.224566] [] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [] ? handle_mm_fault+0x834/0xcd2 [181631.224566] [] ? __do_page_fault+0x211/0x424 [181631.224566] [] do_vfs_ioctl+0x3c6/0x479 (...) The sequence of steps leading to this are: CPU 0 CPU 1 btrfs_balance() btrfs_relocate_chunk() btrfs_relocate_block_group(bg X) btrfs_lookup_block_group(bg X) cleaner_kthread locks fs_info->cleaner_mutex btrfs_delete_unused_bgs() finds bg X, which became unused in the previous transaction checks bg X ->ro == 0, so it proceeds sets bg X ->ro to 1 (btrfs_set_block_group_ro(bg X)) blocks on fs_info->cleaner_mutex btrfs_remove_chunk(bg X) unlocks fs_info->cleaner_mutex acquires fs_info->cleaner_mutex relocate_block_group() --> does nothing, no extents found in the extent tree from bg X unlocks fs_info->cleaner_mutex btrfs_relocate_block_group(bg X) returns btrfs_remove_chunk(bg X) extent map not found --> ASSERT(0) Fix this by using a new mutex to make sure these 2 operations, block group relocation and removal, are serialized. This issue is reproducible by running fstests generic/038 (which stresses chunk allocation and automatic removal of unused block groups) together with the following balance loop: while true; do btrfs balance start -dusage=0 ; done Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/disk-io.c | 12 +++++++++++- fs/btrfs/extent-tree.c | 3 +++ fs/btrfs/volumes.c | 48 +++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 58 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80a9aefb0c46..aac314e14188 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1778,6 +1778,7 @@ struct btrfs_fs_info { spinlock_t unused_bgs_lock; struct list_head unused_bgs; struct mutex unused_bg_unpin_mutex; + struct mutex delete_unused_bgs_mutex; /* For btrfs to record security options */ struct security_mnt_opts security_opts; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b977fc8d8201..b59deb2c63f4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1772,7 +1772,6 @@ static int cleaner_kthread(void *arg) } btrfs_run_delayed_iputs(root); - btrfs_delete_unused_bgs(root->fs_info); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); @@ -1781,6 +1780,16 @@ static int cleaner_kthread(void *arg) * needn't do anything special here. */ btrfs_run_defrag_inodes(root->fs_info); + + /* + * Acquires fs_info->delete_unused_bgs_mutex to avoid racing + * with relocation (btrfs_relocate_chunk) and relocation + * acquires fs_info->cleaner_mutex (btrfs_relocate_block_group) + * after acquiring fs_info->delete_unused_bgs_mutex. So we + * can't hold, nor need to, fs_info->cleaner_mutex when deleting + * unused block groups. + */ + btrfs_delete_unused_bgs(root->fs_info); sleep: if (!try_to_freeze() && !again) { set_current_state(TASK_INTERRUPTIBLE); @@ -2492,6 +2501,7 @@ int open_ctree(struct super_block *sb, spin_lock_init(&fs_info->unused_bgs_lock); rwlock_init(&fs_info->tree_mod_log_lock); mutex_init(&fs_info->unused_bg_unpin_mutex); + mutex_init(&fs_info->delete_unused_bgs_mutex); mutex_init(&fs_info->reloc_mutex); mutex_init(&fs_info->delalloc_root_mutex); seqlock_init(&fs_info->profiles_lock); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 38b76cc02f48..1c2bd1723e40 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -9889,6 +9889,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) } spin_unlock(&fs_info->unused_bgs_lock); + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); + /* Don't want to race with allocators so take the groups_sem */ down_write(&space_info->groups_sem); spin_lock(&block_group->lock); @@ -9983,6 +9985,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) end_trans: btrfs_end_transaction(trans, root); next: + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_put_block_group(block_group); spin_lock(&fs_info->unused_bgs_lock); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d4cd4059bded..9b95503ddd00 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2766,6 +2766,20 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, root = root->fs_info->chunk_root; extent_root = root->fs_info->extent_root; + /* + * Prevent races with automatic removal of unused block groups. + * After we relocate and before we remove the chunk with offset + * chunk_offset, automatic removal of the block group can kick in, + * resulting in a failure when calling btrfs_remove_chunk() below. + * + * Make sure to acquire this mutex before doing a tree search (dev + * or chunk trees) to find chunks. Otherwise the cleaner kthread might + * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after + * we release the path used to search the chunk/dev tree and before + * the current task acquires this mutex and calls us. + */ + ASSERT(mutex_is_locked(&root->fs_info->delete_unused_bgs_mutex)); + ret = btrfs_can_relocate(extent_root, chunk_offset); if (ret) return -ENOSPC; @@ -2814,13 +2828,18 @@ again: key.type = BTRFS_CHUNK_ITEM_KEY; while (1) { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto error; + } BUG_ON(ret == 0); /* Corruption */ ret = btrfs_previous_item(chunk_root, path, key.objectid, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto error; if (ret > 0) @@ -2843,6 +2862,7 @@ again: else BUG_ON(ret); } + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (found_key.offset == 0) break; @@ -3299,9 +3319,12 @@ again: goto error; } + mutex_lock(&fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto error; + } /* * this shouldn't happen, it means the last relocate @@ -3313,6 +3336,7 @@ again: ret = btrfs_previous_item(chunk_root, path, 0, BTRFS_CHUNK_ITEM_KEY); if (ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); ret = 0; break; } @@ -3321,8 +3345,10 @@ again: slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (found_key.objectid != key.objectid) + if (found_key.objectid != key.objectid) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); break; + } chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -3335,10 +3361,13 @@ again: ret = should_balance_chunk(chunk_root, leaf, chunk, found_key.offset); btrfs_release_path(path); - if (!ret) + if (!ret) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto loop; + } if (counting) { + mutex_unlock(&fs_info->delete_unused_bgs_mutex); spin_lock(&fs_info->balance_lock); bctl->stat.expected++; spin_unlock(&fs_info->balance_lock); @@ -3348,6 +3377,7 @@ again: ret = btrfs_relocate_chunk(chunk_root, found_key.objectid, found_key.offset); + mutex_unlock(&fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto error; if (ret == -ENOSPC) { @@ -4087,11 +4117,16 @@ again: key.type = BTRFS_DEV_EXTENT_KEY; do { + mutex_lock(&root->fs_info->delete_unused_bgs_mutex); ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); goto done; + } ret = btrfs_previous_item(root, path, 0, key.type); + if (ret) + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret < 0) goto done; if (ret) { @@ -4105,6 +4140,7 @@ again: btrfs_item_key_to_cpu(l, &key, path->slots[0]); if (key.objectid != device->devid) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4113,6 +4149,7 @@ again: length = btrfs_dev_extent_length(l, dev_extent); if (key.offset + length <= new_size) { + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); btrfs_release_path(path); break; } @@ -4122,6 +4159,7 @@ again: btrfs_release_path(path); ret = btrfs_relocate_chunk(root, chunk_objectid, chunk_offset); + mutex_unlock(&root->fs_info->delete_unused_bgs_mutex); if (ret && ret != -ENOSPC) goto done; if (ret == -ENOSPC) -- cgit From c3f4a1685bb87e59c886ee68f7967eae07d4dffa Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:56 +0100 Subject: Btrfs: use kmem_cache_free when freeing entry in inode cache The free space entries are allocated using kmem_cache_zalloc(), through __btrfs_add_free_space(), therefore we should use kmem_cache_free() and not kfree() to avoid any confusion and any potential problem. Looking at the kfree() definition at mm/slab.c it has the following comment: /* * (...) * * Don't free memory not originally allocated by kmalloc() * or you will run into trouble. */ So better be safe and use kmem_cache_free(). Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index f6a596d5a637..218df701e607 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -271,7 +271,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) __btrfs_add_free_space(ctl, info->offset, count); free: rb_erase(&info->offset_index, rbroot); - kfree(info); + kmem_cache_free(btrfs_free_space_cachep, info); } } -- cgit From ae9d8f17118551bedd797406a6768b87c2146234 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:52:57 +0100 Subject: Btrfs: fix race between caching kthread and returning inode to inode cache While the inode cache caching kthread is calling btrfs_unpin_free_ino(), we could have a concurrent call to btrfs_return_ino() that adds a new entry to the root's free space cache of pinned inodes. This concurrent call does not acquire the fs_info->commit_root_sem before adding a new entry if the caching state is BTRFS_CACHE_FINISHED, which is a problem because the caching kthread calls btrfs_unpin_free_ino() after setting the caching state to BTRFS_CACHE_FINISHED and therefore races with the task calling btrfs_return_ino(), which is adding a new entry, while the former (caching kthread) is navigating the cache's rbtree, removing and freeing nodes from the cache's rbtree without acquiring the spinlock that protects the rbtree. This race resulted in memory corruption due to double free of struct btrfs_free_space objects because both tasks can end up doing freeing the same objects. Note that adding a new entry can result in merging it with other entries in the cache, in which case those entries are freed. This is particularly important as btrfs_free_space structures are also used for the block group free space caches. This memory corruption can be detected by a debugging kernel, which reports it with the following trace: [132408.501148] slab error in verify_redzone_free(): cache `btrfs_free_space': double free detected [132408.505075] CPU: 15 PID: 12248 Comm: btrfs-ino-cache Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [132408.505075] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [132408.505075] ffff880023e7d320 ffff880163d73cd8 ffffffff8145eec7 ffffffff81095dce [132408.505075] ffff880009735d40 ffff880163d73ce8 ffffffff81154e1e ffff880163d73d68 [132408.505075] ffffffff81155733 ffffffffa054a95a ffff8801b6099f00 ffffffffa0505b5f [132408.505075] Call Trace: [132408.505075] [] dump_stack+0x4f/0x7b [132408.505075] [] ? console_unlock+0x356/0x3a2 [132408.505075] [] __slab_error.isra.28+0x25/0x36 [132408.505075] [] __cache_free+0xe2/0x4b6 [132408.505075] [] ? __btrfs_add_free_space+0x2f0/0x343 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] ? time_hardirqs_off+0x15/0x28 [132408.505075] [] ? trace_hardirqs_off+0xd/0xf [132408.505075] [] ? kfree+0xb6/0x14e [132408.505075] [] kfree+0xe5/0x14e [132408.505075] [] btrfs_unpin_free_ino+0x8e/0x99 [btrfs] [132408.505075] [] caching_kthread+0x29e/0x2d9 [btrfs] [132408.505075] [] ? btrfs_unpin_free_ino+0x99/0x99 [btrfs] [132408.505075] [] kthread+0xef/0xf7 [132408.505075] [] ? time_hardirqs_on+0x15/0x28 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] [] ret_from_fork+0x42/0x70 [132408.505075] [] ? __kthread_parkme+0xad/0xad [132408.505075] ffff880023e7d320: redzone 1:0x9f911029d74e35b, redzone 2:0x9f911029d74e35b. [132409.501654] slab: double free detected in cache 'btrfs_free_space', objp ffff880023e7d320 [132409.503355] ------------[ cut here ]------------ [132409.504241] kernel BUG at mm/slab.c:2571! Therefore fix this by having btrfs_unpin_free_ino() acquire the lock that protects the rbtree while doing the searches and removing entries. Fixes: 1c70d8fb4dfa ("Btrfs: fix inode caching vs tree log") Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode-map.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c index 218df701e607..d4a582ac3f73 100644 --- a/fs/btrfs/inode-map.c +++ b/fs/btrfs/inode-map.c @@ -246,6 +246,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) { struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; struct rb_root *rbroot = &root->free_ino_pinned->free_space_offset; + spinlock_t *rbroot_lock = &root->free_ino_pinned->tree_lock; struct btrfs_free_space *info; struct rb_node *n; u64 count; @@ -254,23 +255,29 @@ void btrfs_unpin_free_ino(struct btrfs_root *root) return; while (1) { + bool add_to_ctl = true; + + spin_lock(rbroot_lock); n = rb_first(rbroot); - if (!n) + if (!n) { + spin_unlock(rbroot_lock); break; + } info = rb_entry(n, struct btrfs_free_space, offset_index); BUG_ON(info->bitmap); /* Logic error */ if (info->offset > root->ino_cache_progress) - goto free; + add_to_ctl = false; else if (info->offset + info->bytes > root->ino_cache_progress) count = root->ino_cache_progress - info->offset + 1; else count = info->bytes; - __btrfs_add_free_space(ctl, info->offset, count); -free: rb_erase(&info->offset_index, rbroot); + spin_unlock(rbroot_lock); + if (add_to_ctl) + __btrfs_add_free_space(ctl, info->offset, count); kmem_cache_free(btrfs_free_space_cachep, info); } } -- cgit From da288d280d16f4d7e4ada331cb33d381b408b10c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 13 Jun 2015 06:55:31 +0100 Subject: Btrfs: fix crash on close_ctree() if cleaner starts new transaction Often when running fstests btrfs/079 I was running into the following trace during umount on one of my qemu/kvm test vms: [ 8245.682441] WARNING: CPU: 8 PID: 25064 at fs/btrfs/extent-tree.c:138 btrfs_put_block_group+0x51/0x69 [btrfs]() [ 8245.685039] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 acpi_cpufreq processor psmouse i2c_core thermal_sys parport evdev serio_raw button pcspkr microcode ext4 crc16 jbd2 mbcache sg sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring scsi_mod virtio e1000 [last unloaded: btrfs] [ 8245.693860] CPU: 8 PID: 25064 Comm: umount Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [ 8245.695081] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [ 8245.697583] 0000000000000009 ffff88020d047ce8 ffffffff8145eec7 ffffffff81095dce [ 8245.699234] 0000000000000000 ffff88020d047d28 ffffffff8104b399 0000000000000028 [ 8245.700995] ffffffffa04db07b ffff8801c6036c00 ffff8801c6036d68 ffff880202eb40b0 [ 8245.702510] Call Trace: [ 8245.703006] [] dump_stack+0x4f/0x7b [ 8245.705393] [] ? console_unlock+0x356/0x3a2 [ 8245.706569] [] warn_slowpath_common+0xa1/0xbb [ 8245.707747] [] ? btrfs_put_block_group+0x51/0x69 [btrfs] [ 8245.709101] [] warn_slowpath_null+0x1a/0x1c [ 8245.710274] [] btrfs_put_block_group+0x51/0x69 [btrfs] [ 8245.711823] [] btrfs_free_block_groups+0x145/0x322 [btrfs] [ 8245.713251] [] close_ctree+0x1ef/0x325 [btrfs] [ 8245.714448] [] ? evict_inodes+0xdc/0xeb [ 8245.715539] [] btrfs_put_super+0x19/0x1b [btrfs] [ 8245.716835] [] generic_shutdown_super+0x73/0xef [ 8245.718015] [] kill_anon_super+0x13/0x1e [ 8245.719101] [] btrfs_kill_super+0x17/0x23 [btrfs] [ 8245.720316] [] deactivate_locked_super+0x3b/0x68 [ 8245.721517] [] deactivate_super+0x3f/0x43 [ 8245.722581] [] cleanup_mnt+0x59/0x78 [ 8245.723538] [] __cleanup_mnt+0x12/0x14 [ 8245.724572] [] task_work_run+0x8f/0xbc [ 8245.725598] [] do_notify_resume+0x45/0x53 [ 8245.726892] [] int_signal+0x12/0x17 [ 8245.737887] ---[ end trace a01d038397e99b92 ]--- [ 8245.769363] general protection fault: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC [ 8245.770737] Modules linked in: btrfs dm_flakey dm_mod crc32c_generic xor raid6_pq nfsd auth_rpcgss oid_registry nfs_acl nfs lockd grace fscache sunrpc loop fuse parport_pc i2c_piix4 acpi_cpufreq processor psmouse i2c_core thermal_sys parport evdev serio_raw button pcspkr microcode ext4 crc16 jbd2 mbcache sg sr_mod cdrom sd_mod ata_generic virtio_scsi ata_piix libata floppy virtio_pci virtio_ring scsi_mod virtio e1000 [last unloaded: btrfs] [ 8245.772641] CPU: 2 PID: 25064 Comm: umount Tainted: G W 4.1.0-rc5-btrfs-next-10+ #1 [ 8245.772641] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.8.1-0-g4adadbd-20150316_085822-nilsson.home.kraxel.org 04/01/2014 [ 8245.772641] task: ffff880013005810 ti: ffff88020d044000 task.ti: ffff88020d044000 [ 8245.772641] RIP: 0010:[] [] btrfs_queue_work+0x2c/0x14d [btrfs] [ 8245.772641] RSP: 0018:ffff88020d0478b8 EFLAGS: 00010202 [ 8245.772641] RAX: 0000000000000004 RBX: 6b6b6b6b6b6b6b6b RCX: ffffffffa0581488 [ 8245.772641] RDX: 0000000000000000 RSI: ffff880194b7bf48 RDI: ffff880144b6a7a0 [ 8245.772641] RBP: ffff88020d0478d8 R08: 0000000000000000 R09: 000000000000ffff [ 8245.772641] R10: 0000000000000004 R11: 0000000000000005 R12: ffff880194b7bf48 [ 8245.772641] R13: ffff880194b7bf48 R14: 0000000000000410 R15: 0000000000000000 [ 8245.772641] FS: 00007f991e77d840(0000) GS:ffff88023e280000(0000) knlGS:0000000000000000 [ 8245.772641] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 8245.772641] CR2: 00007fbbd325ee68 CR3: 000000021de8e000 CR4: 00000000000006e0 [ 8245.772641] Stack: [ 8245.772641] ffff880194b7bf00 ffff880202eb4000 ffff880194b7bf48 0000000000000410 [ 8245.772641] ffff88020d047958 ffffffffa04ec6d5 ffff8801629b2ee8 0000000082987570 [ 8245.772641] 0000000000a5813f 0000000000000001 ffff880013006100 0000000000000002 [ 8245.772641] Call Trace: [ 8245.772641] [] btrfs_wq_submit_bio+0xe1/0x17b [btrfs] [ 8245.772641] [] ? check_irq_usage+0x76/0x87 [ 8245.772641] [] btree_submit_bio_hook+0xb6/0xd9 [btrfs] [ 8245.772641] [] ? btree_csum_one_bio+0xad/0xad [btrfs] [ 8245.772641] [] ? btree_io_failed_hook+0x5e/0x5e [btrfs] [ 8245.772641] [] submit_one_bio+0x8c/0xc7 [btrfs] [ 8245.772641] [] submit_extent_page.isra.18+0x9d/0x186 [btrfs] [ 8245.772641] [] write_one_eb+0x117/0x1ae [btrfs] [ 8245.772641] [] ? end_extent_buffer_writeback+0x21/0x21 [btrfs] [ 8245.772641] [] btree_write_cache_pages+0x2ab/0x385 [btrfs] [ 8245.772641] [] btree_writepages+0x23/0x5c [btrfs] [ 8245.772641] [] do_writepages+0x23/0x2c [ 8245.772641] [] __writeback_single_inode+0xda/0x5bd [ 8245.772641] [] ? writeback_single_inode+0x2b/0x173 [ 8245.772641] [] writeback_single_inode+0xc8/0x173 [ 8245.772641] [] write_inode_now+0x8a/0x95 [ 8245.772641] [] ? _atomic_dec_and_lock+0x30/0x4e [ 8245.772641] [] iput+0x17d/0x26a [ 8245.772641] [] close_ctree+0x22a/0x325 [btrfs] [ 8245.772641] [] ? evict_inodes+0xdc/0xeb [ 8245.772641] [] btrfs_put_super+0x19/0x1b [btrfs] [ 8245.772641] [] generic_shutdown_super+0x73/0xef [ 8245.772641] [] kill_anon_super+0x13/0x1e [ 8245.772641] [] btrfs_kill_super+0x17/0x23 [btrfs] [ 8245.772641] [] deactivate_locked_super+0x3b/0x68 [ 8245.772641] [] deactivate_super+0x3f/0x43 [ 8245.772641] [] cleanup_mnt+0x59/0x78 [ 8245.772641] [] __cleanup_mnt+0x12/0x14 [ 8245.772641] [] task_work_run+0x8f/0xbc [ 8245.772641] [] do_notify_resume+0x45/0x53 [ 8245.772641] [] int_signal+0x12/0x17 [ 8245.772641] Code: 1f 44 00 00 55 48 89 e5 41 56 41 55 41 54 53 49 89 f4 48 8b 46 70 a8 04 74 09 48 8b 5f 08 48 85 db 75 03 48 8b 1f 49 89 5c 24 68 <83> 7b 5c ff 74 04 f0 ff 43 50 49 83 7c 24 08 00 74 2c 4c 8d 6b [ 8245.772641] RIP [] btrfs_queue_work+0x2c/0x14d [btrfs] [ 8245.772641] RSP [ 8245.845040] ---[ end trace a01d038397e99b93 ]--- For logical reasons such as the phase of the moon, this happened more often with "-o inode_cache" than without any mount options. After some debugging it turned out to be simple to understand what was happening: 1) close_ctree() is called; 2) It then stops the transaction kthread, which commits the current transaction; 3) It asks the cleaner kthread to stop, which is currently running btrfs_delete_unused_bgs(); 4) btrfs_delete_unused_bgs() finds an unused block group, starts a new transaction, deletes the block group, which implies COWing some tree nodes and leafs and dirtying their respective pages, and then finally it ends the transaction it started, without committing it; 5) The cleaner kthread stops; 6) close_ctree() releases (from memory) the block group objects, which produces the warning in the trace pasted above; 7) Then it invalidates all pages of the btree inode, by calling invalidate_inode_pages2(), which waits for any pages under writeback, and releases any non-dirty pages; 8) All work queues are destroyed (waiting first for their current tasks to finish execution); 9) A final iput() is called against the btree inode; 10) This iput triggers a writeback of the btree inode because it still has dirty pages; 11) This starts the whole chain of callbacks for the btree inode until it eventually reaches btrfs_wq_submit_bio() where it leads to a NULL pointer dereference because the work queues were already destroyed. Fix this by making the cleaner commit any transaction that it started after the transaction kthread was stopped. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/disk-io.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b59deb2c63f4..e5aad7f535aa 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1751,6 +1751,7 @@ static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; int again; + struct btrfs_trans_handle *trans; do { again = 0; @@ -1798,6 +1799,34 @@ sleep: __set_current_state(TASK_RUNNING); } } while (!kthread_should_stop()); + + /* + * Transaction kthread is stopped before us and wakes us up. + * However we might have started a new transaction and COWed some + * tree blocks when deleting unused block groups for example. So + * make sure we commit the transaction we started to have a clean + * shutdown when evicting the btree inode - if it has dirty pages + * when we do the final iput() on it, eviction will trigger a + * writeback for it which will fail with null pointer dereferences + * since work queues and other resources were already released and + * destroyed by the time the iput/eviction/writeback is made. + */ + trans = btrfs_attach_transaction(root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) + btrfs_err(root->fs_info, + "cleaner transaction attach returned %ld", + PTR_ERR(trans)); + } else { + int ret; + + ret = btrfs_commit_transaction(trans, root); + if (ret) + btrfs_err(root->fs_info, + "cleaner open transaction commit returned %d", + ret); + } + return 0; } -- cgit From e4545de5b035c7debb73d260c78377dbb69cbfb5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 17 Jun 2015 12:49:23 +0100 Subject: Btrfs: fix fsync data loss after append write If we do an append write to a file (which increases its inode's i_size) that does not have the flag BTRFS_INODE_NEEDS_FULL_SYNC set in its inode, and the previous transaction added a new hard link to the file, which sets the flag BTRFS_INODE_COPY_EVERYTHING in the file's inode, and then fsync the file, the inode's new i_size isn't logged. This has the consequence that after the fsync log is replayed, the file size remains what it was before the append write operation, which means users/applications will not be able to read the data that was successsfully fsync'ed before. This happens because neither the inode item nor the delayed inode get their i_size updated when the append write is made - doing so would require starting a transaction in the buffered write path, something that we do not do intentionally for performance reasons. Fix this by making sure that when the flag BTRFS_INODE_COPY_EVERYTHING is set the inode is logged with its current i_size (log the in-memory inode into the log tree). This issue is not a recent regression and is easy to reproduce with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey # real QA test starts here _supported_fs generic _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_metadata_journaling $SCRATCH_DEV _crash_and_mount() { # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey } rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and then fsync it. # The fsync here is only needed to trigger the issue in btrfs, as it causes the # the flag BTRFS_INODE_NEEDS_FULL_SYNC to be removed from the btrfs inode. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io sync # Add a hard link to our file. # On btrfs this sets the flag BTRFS_INODE_COPY_EVERYTHING on the btrfs inode, # which is a necessary condition to trigger the issue. ln $SCRATCH_MNT/foo $SCRATCH_MNT/bar # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now append more data to our file, increasing its size, and fsync the file. # In btrfs because the inode flag BTRFS_INODE_COPY_EVERYTHING was set and the # write path did not update the inode item in the btree nor the delayed inode # item (in memory struture) in the current transaction (created by the fsync # handler), the fsync did not record the inode's new i_size in the fsync # log/journal. This made the data unavailable after the fsync log/journal is # replayed. $XFS_IO_PROG -c "pwrite -S 0xbb 32K 32K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io echo "File content after fsync and before crash:" od -t x1 $SCRATCH_MNT/foo _crash_and_mount echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo status=0 exit The expected file output before and after the crash/power failure expects the appended data to be available, which is: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0100000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0200000 Cc: stable@vger.kernel.org Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 1ce80c1c4eb6..76c4f9d1b80a 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4155,6 +4155,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, u64 ino = btrfs_ino(inode); struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; u64 logged_isize = 0; + bool need_log_inode_item = true; path = btrfs_alloc_path(); if (!path) @@ -4263,11 +4264,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, } else { if (inode_only == LOG_INODE_ALL) fast_search = true; - ret = log_inode_item(trans, log, dst_path, inode); - if (ret) { - err = ret; - goto out_unlock; - } goto log_extents; } @@ -4290,6 +4286,9 @@ again: if (min_key.type > max_key.type) break; + if (min_key.type == BTRFS_INODE_ITEM_KEY) + need_log_inode_item = false; + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4360,6 +4359,11 @@ next_slot: log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); + if (need_log_inode_item) { + err = log_inode_item(trans, log, dst_path, inode); + if (err) + goto out_unlock; + } if (fast_search) { /* * Some ordered extents started by fsync might have completed -- cgit From 36283bf777d963fac099213297e155d071096994 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Sat, 20 Jun 2015 00:44:51 +0100 Subject: Btrfs: fix fsync xattr loss in the fast fsync path After commit 4f764e515361 ("Btrfs: remove deleted xattrs on fsync log replay"), we can end up in a situation where during log replay we end up deleting xattrs that were never deleted when their file was last fsynced. This happens in the fast fsync path (flag BTRFS_INODE_NEEDS_FULL_SYNC is not set in the inode) if the inode has the flag BTRFS_INODE_COPY_EVERYTHING set, the xattr was added in a past transaction and the leaf where the xattr is located was not updated (COWed or created) in the current transaction. In this scenario the xattr item never ends up in the log tree and therefore at log replay time, which makes the replay code delete the xattr from the fs/subvol tree as it thinks that xattr was deleted prior to the last fsync. Fix this by always logging all xattrs, which is the simplest and most reliable way to detect deleted xattrs and replay the deletes at log replay time. This issue is reproducible with the following test case for fstests: seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" here=`pwd` tmp=/tmp/$$ status=1 # failure is the default! _cleanup() { _cleanup_flakey rm -f $tmp.* } trap "_cleanup; exit \$status" 0 1 2 3 15 # get standard environment, filters and checks . ./common/rc . ./common/filter . ./common/dmflakey . ./common/attr # real QA test starts here # We create a lot of xattrs for a single file. Only btrfs and xfs are currently # able to store such a large mount of xattrs per file, other filesystems such # as ext3/4 and f2fs for example, fail with ENOSPC even if we attempt to add # less than 1000 xattrs with very small values. _supported_fs btrfs xfs _supported_os Linux _need_to_be_root _require_scratch _require_dm_flakey _require_attrs _require_metadata_journaling $SCRATCH_DEV rm -f $seqres.full _scratch_mkfs >> $seqres.full 2>&1 _init_flakey _mount_flakey # Create the test file with some initial data and make sure everything is # durably persisted. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 32k" $SCRATCH_MNT/foo | _filter_xfs_io sync # Add many small xattrs to our file. # We create such a large amount because it's needed to trigger the issue found # in btrfs - we need to have an amount that causes the fs to have at least 3 # btree leafs with xattrs stored in them, and it must work on any leaf size # (maximum leaf/node size is 64Kb). num_xattrs=2000 for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" $SETFATTR_PROG -n $name -v "val_$(printf "%04d" $i)" $SCRATCH_MNT/foo done # Sync the filesystem to force a commit of the current btrfs transaction, this # is a necessary condition to trigger the bug on btrfs. sync # Now update our file's data and fsync the file. # After a successful fsync, if the fsync log/journal is replayed we expect to # see all the xattrs we added before with the same values (and the updated file # data of course). Btrfs used to delete some of these xattrs when it replayed # its fsync log/journal. $XFS_IO_PROG -c "pwrite -S 0xbb 8K 16K" \ -c "fsync" \ $SCRATCH_MNT/foo | _filter_xfs_io # Simulate a crash/power loss. _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again and mount. This makes the fs replay its fsync log. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey echo "File content after crash and log replay:" od -t x1 $SCRATCH_MNT/foo echo "File xattrs after crash and log replay:" for ((i = 1; i <= $num_xattrs; i++)); do name="user.attr_$(printf "%04d" $i)" echo -n "$name=" $GETFATTR_PROG --absolute-names -n $name --only-values $SCRATCH_MNT/foo echo done status=0 exit The golden output expects all xattrs to be available, and with the correct values, after the fsync log is replayed. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 76c4f9d1b80a..66f87156882f 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4117,6 +4117,86 @@ static int logged_inode_size(struct btrfs_root *log, struct inode *inode, return 0; } +/* + * At the moment we always log all xattrs. This is to figure out at log replay + * time which xattrs must have their deletion replayed. If a xattr is missing + * in the log tree and exists in the fs/subvol tree, we delete it. This is + * because if a xattr is deleted, the inode is fsynced and a power failure + * happens, causing the log to be replayed the next time the fs is mounted, + * we want the xattr to not exist anymore (same behaviour as other filesystems + * with a journal, ext3/4, xfs, f2fs, etc). + */ +static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path, + struct btrfs_path *dst_path) +{ + int ret; + struct btrfs_key key; + const u64 ino = btrfs_ino(inode); + int ins_nr = 0; + int start_slot = 0; + + key.objectid = ino; + key.type = BTRFS_XATTR_ITEM_KEY; + key.offset = 0; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + return ret; + + while (true) { + int slot = path->slots[0]; + struct extent_buffer *leaf = path->nodes[0]; + int nritems = btrfs_header_nritems(leaf); + + if (slot >= nritems) { + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + ins_nr = 0; + } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + return ret; + else if (ret > 0) + break; + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid != ino || key.type != BTRFS_XATTR_ITEM_KEY) + break; + + if (ins_nr == 0) + start_slot = slot; + ins_nr++; + path->slots[0]++; + cond_resched(); + } + if (ins_nr > 0) { + u64 last_extent = 0; + + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + /* can't be 1, extent items aren't processed */ + ASSERT(ret <= 0); + if (ret < 0) + return ret; + } + + return 0; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4289,6 +4369,25 @@ again: if (min_key.type == BTRFS_INODE_ITEM_KEY) need_log_inode_item = false; + /* Skip xattrs, we log them later with btrfs_log_all_xattrs() */ + if (min_key.type == BTRFS_XATTR_ITEM_KEY) { + if (ins_nr == 0) + goto next_slot; + ret = copy_items(trans, inode, dst_path, path, + &last_extent, ins_start_slot, + ins_nr, inode_only, logged_isize); + if (ret < 0) { + err = ret; + goto out_unlock; + } + ins_nr = 0; + if (ret) { + btrfs_release_path(path); + continue; + } + goto next_slot; + } + src = path->nodes[0]; if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) { ins_nr++; @@ -4356,6 +4455,11 @@ next_slot: ins_nr = 0; } + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); + if (err) + goto out_unlock; log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); -- cgit From a89ca6f24ffe435edad57de02eaabd37a2c6bff6 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 25 Jun 2015 04:17:46 +0100 Subject: Btrfs: fix fsync after truncate when no_holes feature is enabled When we have the no_holes feature enabled, if a we truncate a file to a smaller size, truncate it again but to a size greater than or equals to its original size and fsync it, the log tree will not have any information about the hole covering the range [truncate_1_offset, new_file_size[. Which means if the fsync log is replayed, the file will remain with the state it had before both truncate operations. Without the no_holes feature this does not happen, since when the inode is logged (full sync flag is set) it will find in the fs/subvol tree a leaf with a generation matching the current transaction id that has an explicit extent item representing the hole. Fix this by adding an explicit extent item representing a hole between the last extent and the inode's i_size if we are doing a full sync. The issue is easy to reproduce with the following test case for fstests: . ./common/rc . ./common/filter . ./common/dmflakey _need_to_be_root _supported_fs generic _supported_os Linux _require_scratch _require_dm_flakey # This test was motivated by an issue found in btrfs when the btrfs # no-holes feature is enabled (introduced in kernel 3.14). So enable # the feature if the fs being tested is btrfs. if [ $FSTYP == "btrfs" ]; then _require_btrfs_fs_feature "no_holes" _require_btrfs_mkfs_feature "no-holes" MKFS_OPTIONS="$MKFS_OPTIONS -O no-holes" fi rm -f $seqres.full _scratch_mkfs >>$seqres.full 2>&1 _init_flakey _mount_flakey # Create our test files and make sure everything is durably persisted. $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 64K" \ -c "pwrite -S 0xbb 64K 61K" \ $SCRATCH_MNT/foo | _filter_xfs_io $XFS_IO_PROG -f -c "pwrite -S 0xee 0 64K" \ -c "pwrite -S 0xff 64K 61K" \ $SCRATCH_MNT/bar | _filter_xfs_io sync # Now truncate our file foo to a smaller size (64Kb) and then truncate # it to the size it had before the shrinking truncate (125Kb). Then # fsync our file. If a power failure happens after the fsync, we expect # our file to have a size of 125Kb, with the first 64Kb of data having # the value 0xaa and the second 61Kb of data having the value 0x00. $XFS_IO_PROG -c "truncate 64K" \ -c "truncate 125K" \ -c "fsync" \ $SCRATCH_MNT/foo # Do something similar to our file bar, but the first truncation sets # the file size to 0 and the second truncation expands the size to the # double of what it was initially. $XFS_IO_PROG -c "truncate 0" \ -c "truncate 253K" \ -c "fsync" \ $SCRATCH_MNT/bar _load_flakey_table $FLAKEY_DROP_WRITES _unmount_flakey # Allow writes again, mount to trigger log replay and validate file # contents. _load_flakey_table $FLAKEY_ALLOW_WRITES _mount_flakey # We expect foo to have a size of 125Kb, the first 64Kb of data all # having the value 0xaa and the remaining 61Kb to be a hole (all bytes # with value 0x00). echo "File foo content after log replay:" od -t x1 $SCRATCH_MNT/foo # We expect bar to have a size of 253Kb and no extents (any byte read # from bar has the value 0x00). echo "File bar content after log replay:" od -t x1 $SCRATCH_MNT/bar status=0 exit The expected file contents in the golden output are: File foo content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0200000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0372000 File bar content after log replay: 0000000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0772000 Without this fix, their contents are: File foo content after log replay: 0000000 aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa aa * 0200000 bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb bb * 0372000 File bar content after log replay: 0000000 ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee ee * 0200000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff * 0372000 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 * 0772000 A test case submission for fstests follows soon. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/tree-log.c | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 66f87156882f..9c45431e69ab 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4197,6 +4197,107 @@ static int btrfs_log_all_xattrs(struct btrfs_trans_handle *trans, return 0; } +/* + * If the no holes feature is enabled we need to make sure any hole between the + * last extent and the i_size of our inode is explicitly marked in the log. This + * is to make sure that doing something like: + * + * 1) create file with 128Kb of data + * 2) truncate file to 64Kb + * 3) truncate file to 256Kb + * 4) fsync file + * 5) + * 6) mount fs and trigger log replay + * + * Will give us a file with a size of 256Kb, the first 64Kb of data match what + * the file had in its first 64Kb of data at step 1 and the last 192Kb of the + * file correspond to a hole. The presence of explicit holes in a log tree is + * what guarantees that log replay will remove/adjust file extent items in the + * fs/subvol tree. + * + * Here we do not need to care about holes between extents, that is already done + * by copy_items(). We also only need to do this in the full sync path, where we + * lookup for extents from the fs/subvol tree only. In the fast path case, we + * lookup the list of modified extent maps and if any represents a hole, we + * insert a corresponding extent representing a hole in the log tree. + */ +static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct inode *inode, + struct btrfs_path *path) +{ + int ret; + struct btrfs_key key; + u64 hole_start; + u64 hole_size; + struct extent_buffer *leaf; + struct btrfs_root *log = root->log_root; + const u64 ino = btrfs_ino(inode); + const u64 i_size = i_size_read(inode); + + if (!btrfs_fs_incompat(root->fs_info, NO_HOLES)) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = (u64)-1; + + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ASSERT(ret != 0); + if (ret < 0) + return ret; + + ASSERT(path->slots[0] > 0); + path->slots[0]--; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + + if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { + /* inode does not have any extents */ + hole_start = 0; + hole_size = i_size; + } else { + struct btrfs_file_extent_item *extent; + u64 len; + + /* + * If there's an extent beyond i_size, an explicit hole was + * already inserted by copy_items(). + */ + if (key.offset >= i_size) + return 0; + + extent = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + + if (btrfs_file_extent_type(leaf, extent) == + BTRFS_FILE_EXTENT_INLINE) { + len = btrfs_file_extent_inline_len(leaf, + path->slots[0], + extent); + ASSERT(len == i_size); + return 0; + } + + len = btrfs_file_extent_num_bytes(leaf, extent); + /* Last extent goes beyond i_size, no need to log a hole. */ + if (key.offset + len > i_size) + return 0; + hole_start = key.offset + len; + hole_size = i_size - hole_start; + } + btrfs_release_path(path); + + /* Last extent ends at i_size. */ + if (hole_size == 0) + return 0; + + hole_size = ALIGN(hole_size, root->sectorsize); + ret = btrfs_insert_file_extent(trans, log, ino, hole_start, 0, 0, + hole_size, 0, hole_size, 0, 0, 0); + return ret; +} + /* log a single inode in the tree log. * At least one parent directory for this inode must exist in the tree * or be logged already. @@ -4460,6 +4561,13 @@ next_slot: err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); if (err) goto out_unlock; + if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { + btrfs_release_path(path); + btrfs_release_path(dst_path); + err = btrfs_log_trailing_hole(trans, root, inode, path); + if (err) + goto out_unlock; + } log_extents: btrfs_release_path(path); btrfs_release_path(dst_path); -- cgit From 207910ddeeda38fd54544d94f8c8ca5a9632cc25 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:04 -0700 Subject: btrfs: pass unaligned length to btrfs_cmp_data() In the case that we dedupe the tail of a file, we might expand the dedupe len out to the end of our last block. We don't want to compare data past i_size however, so pass the original length to btrfs_cmp_data(). Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index c86b835da7a8..55504338491d 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2943,7 +2943,8 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } - ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); + /* pass original length for comparison so we stay within i_size */ + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); -- cgit From f441460202cb787c49963bcc1f54cb48c52f7512 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:05 -0700 Subject: btrfs: fix deadlock with extent-same and readpage ->readpage() does page_lock() before extent_lock(), we do the opposite in extent-same. We want to reverse the order in btrfs_extent_same() but it's not quite straightforward since the page locks are taken inside btrfs_cmp_data(). So I split btrfs_cmp_data() into 3 parts with a small context structure that is passed between them. The first, btrfs_cmp_data_prepare() gathers up the pages needed (taking page lock as required) and puts them on our context structure. At this point, we are safe to lock the extent range. Afterwards, we use btrfs_cmp_data() to do the data compare as usual and btrfs_cmp_data_free() to clean up our context. Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 148 +++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 117 insertions(+), 31 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 55504338491d..9ebe2dd31f2a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2765,14 +2765,11 @@ out: return ret; } -static struct page *extent_same_get_page(struct inode *inode, u64 off) +static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) { struct page *page; - pgoff_t index; struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; - index = off >> PAGE_CACHE_SHIFT; - page = grab_cache_page(inode->i_mapping, index); if (!page) return NULL; @@ -2793,6 +2790,20 @@ static struct page *extent_same_get_page(struct inode *inode, u64 off) return page; } +static int gather_extent_pages(struct inode *inode, struct page **pages, + int num_pages, u64 off) +{ + int i; + pgoff_t index = off >> PAGE_CACHE_SHIFT; + + for (i = 0; i < num_pages; i++) { + pages[i] = extent_same_get_page(inode, index + i); + if (!pages[i]) + return -ENOMEM; + } + return 0; +} + static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) { /* do any pending delalloc/csum calc on src, one way or @@ -2818,52 +2829,120 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) } } -static void btrfs_double_unlock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) { - unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); - unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); - mutex_unlock(&inode1->i_mutex); mutex_unlock(&inode2->i_mutex); } -static void btrfs_double_lock(struct inode *inode1, u64 loff1, - struct inode *inode2, u64 loff2, u64 len) +static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) +{ + if (inode1 < inode2) + swap(inode1, inode2); + + mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); + if (inode1 != inode2) + mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); +} + +static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) +{ + unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); + unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); +} + +static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, + struct inode *inode2, u64 loff2, u64 len) { if (inode1 < inode2) { swap(inode1, inode2); swap(loff1, loff2); } - - mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); lock_extent_range(inode1, loff1, len); - if (inode1 != inode2) { - mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); + if (inode1 != inode2) lock_extent_range(inode2, loff2, len); +} + +struct cmp_pages { + int num_pages; + struct page **src_pages; + struct page **dst_pages; +}; + +static void btrfs_cmp_data_free(struct cmp_pages *cmp) +{ + int i; + struct page *pg; + + for (i = 0; i < cmp->num_pages; i++) { + pg = cmp->src_pages[i]; + if (pg) + page_cache_release(pg); + pg = cmp->dst_pages[i]; + if (pg) + page_cache_release(pg); + } + kfree(cmp->src_pages); + kfree(cmp->dst_pages); +} + +static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, + struct inode *dst, u64 dst_loff, + u64 len, struct cmp_pages *cmp) +{ + int ret; + int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT; + struct page **src_pgarr, **dst_pgarr; + + /* + * We must gather up all the pages before we initiate our + * extent locking. We use an array for the page pointers. Size + * of the array is bounded by len, which is in turn bounded by + * BTRFS_MAX_DEDUPE_LEN. + */ + src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS); + if (!src_pgarr || !dst_pgarr) { + kfree(src_pgarr); + kfree(dst_pgarr); + return -ENOMEM; } + cmp->num_pages = num_pages; + cmp->src_pages = src_pgarr; + cmp->dst_pages = dst_pgarr; + + ret = gather_extent_pages(src, cmp->src_pages, cmp->num_pages, loff); + if (ret) + goto out; + + ret = gather_extent_pages(dst, cmp->dst_pages, cmp->num_pages, dst_loff); + +out: + if (ret) + btrfs_cmp_data_free(cmp); + return 0; } static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, - u64 dst_loff, u64 len) + u64 dst_loff, u64 len, struct cmp_pages *cmp) { int ret = 0; + int i; struct page *src_page, *dst_page; unsigned int cmp_len = PAGE_CACHE_SIZE; void *addr, *dst_addr; + i = 0; while (len) { if (len < PAGE_CACHE_SIZE) cmp_len = len; - src_page = extent_same_get_page(src, loff); - if (!src_page) - return -EINVAL; - dst_page = extent_same_get_page(dst, dst_loff); - if (!dst_page) { - page_cache_release(src_page); - return -EINVAL; - } + BUG_ON(i >= cmp->num_pages); + + src_page = cmp->src_pages[i]; + dst_page = cmp->dst_pages[i]; + addr = kmap_atomic(src_page); dst_addr = kmap_atomic(dst_page); @@ -2875,15 +2954,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, kunmap_atomic(addr); kunmap_atomic(dst_addr); - page_cache_release(src_page); - page_cache_release(dst_page); if (ret) break; - loff += cmp_len; - dst_loff += cmp_len; len -= cmp_len; + i++; } return ret; @@ -2914,6 +2990,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, { int ret; u64 len = olen; + struct cmp_pages cmp; /* * btrfs_clone() can't handle extents in the same file @@ -2926,7 +3003,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (len == 0) return 0; - btrfs_double_lock(src, loff, dst, dst_loff, len); + btrfs_double_inode_lock(src, dst); ret = extent_same_check_offsets(src, loff, &len, olen); if (ret) @@ -2943,13 +3020,22 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, goto out_unlock; } + ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); + if (ret) + goto out_unlock; + + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + /* pass original length for comparison so we stay within i_size */ - ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen); + ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + + btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_unlock(src, loff, dst, dst_loff, len); + btrfs_double_inode_unlock(src, dst); return ret; } -- cgit From 0efa9f48c7e6c15e75946dd2b1c82d3d19e13545 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:07 -0700 Subject: btrfs: allow dedupe of same inode clone() supports cloning within an inode so extent-same can do the same now. This patch fixes up the locking in extent-same to know about the single-inode case. In addition to that, we add a check for overlapping ranges, which clone does not allow. Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 76 ++++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 60 insertions(+), 16 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 9ebe2dd31f2a..af064946c9b2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2991,27 +2991,61 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, int ret; u64 len = olen; struct cmp_pages cmp; + int same_inode = 0; + u64 same_lock_start = 0; + u64 same_lock_len = 0; - /* - * btrfs_clone() can't handle extents in the same file - * yet. Once that works, we can drop this check and replace it - * with a check for the same inode, but overlapping extents. - */ if (src == dst) - return -EINVAL; + same_inode = 1; if (len == 0) return 0; - btrfs_double_inode_lock(src, dst); + if (same_inode) { + mutex_lock(&src->i_mutex); - ret = extent_same_check_offsets(src, loff, &len, olen); - if (ret) - goto out_unlock; + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; - ret = extent_same_check_offsets(dst, dst_loff, &len, olen); - if (ret) - goto out_unlock; + /* + * Single inode case wants the same checks, except we + * don't want our length pushed out past i_size as + * comparing that data range makes no sense. + * + * extent_same_check_offsets() will do this for an + * unaligned length at i_size, so catch it here and + * reject the request. + * + * This effectively means we require aligned extents + * for the single-inode case, whereas the other cases + * allow an unaligned length so long as it ends at + * i_size. + */ + if (len != olen) { + ret = -EINVAL; + goto out_unlock; + } + + /* Check for overlapping ranges */ + if (dst_loff + len > loff && dst_loff < loff + len) { + ret = -EINVAL; + goto out_unlock; + } + + same_lock_start = min_t(u64, loff, dst_loff); + same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; + } else { + btrfs_double_inode_lock(src, dst); + + ret = extent_same_check_offsets(src, loff, &len, olen); + if (ret) + goto out_unlock; + + ret = extent_same_check_offsets(dst, dst_loff, &len, olen); + if (ret) + goto out_unlock; + } /* don't make the dst file partly checksummed */ if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != @@ -3024,18 +3058,28 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, if (ret) goto out_unlock; - btrfs_double_extent_lock(src, loff, dst, dst_loff, len); + if (same_inode) + lock_extent_range(src, same_lock_start, same_lock_len); + else + btrfs_double_extent_lock(src, loff, dst, dst_loff, len); /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); - btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); + if (same_inode) + unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, + same_lock_start + same_lock_len - 1); + else + btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); btrfs_cmp_data_free(&cmp); out_unlock: - btrfs_double_inode_unlock(src, dst); + if (same_inode) + mutex_unlock(&src->i_mutex); + else + btrfs_double_inode_unlock(src, dst); return ret; } -- cgit From 1c919a5e13702caffbe2d2c7c305f9d0d2925160 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 30 Jun 2015 14:42:08 -0700 Subject: btrfs: don't update mtime/ctime on deduped inodes One issue users have reported is that dedupe changes mtime on files, resulting in tools like rsync thinking that their contents have changed when in fact the data is exactly the same. We also skip the ctime update as no user-visible metadata changes here and we want dedupe to be transparent to the user. Clone still wants time changes, so we special case this in the code. This was tested with the btrfs-extent-same tool. Signed-off-by: Mark Fasheh Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index af064946c9b2..5d91776e12a2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -87,7 +87,8 @@ struct btrfs_ioctl_received_subvol_args_32 { static int btrfs_clone(struct inode *src, struct inode *inode, - u64 off, u64 olen, u64 olen_aligned, u64 destoff); + u64 off, u64 olen, u64 olen_aligned, u64 destoff, + int no_time_update); /* Mask out flags that are inappropriate for the given type of inode. */ static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) @@ -3066,7 +3067,7 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, /* pass original length for comparison so we stay within i_size */ ret = btrfs_cmp_data(src, loff, dst, dst_loff, olen, &cmp); if (ret == 0) - ret = btrfs_clone(src, dst, loff, olen, len, dst_loff); + ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); if (same_inode) unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, @@ -3231,13 +3232,15 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans, struct inode *inode, u64 endoff, const u64 destoff, - const u64 olen) + const u64 olen, + int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; int ret; inode_inc_iversion(inode); - inode->i_mtime = inode->i_ctime = CURRENT_TIME; + if (!no_time_update) + inode->i_mtime = inode->i_ctime = CURRENT_TIME; /* * We round up to the block size at eof when determining which * extents to clone above, but shouldn't round up the file size. @@ -3322,13 +3325,13 @@ static void clone_update_extent_map(struct inode *inode, * @inode: Inode to clone to * @off: Offset within source to start clone from * @olen: Original length, passed by user, of range to clone - * @olen_aligned: Block-aligned value of olen, extent_same uses - * identical values here + * @olen_aligned: Block-aligned value of olen * @destoff: Offset within @inode to start clone + * @no_time_update: Whether to update mtime/ctime on the target inode */ static int btrfs_clone(struct inode *src, struct inode *inode, const u64 off, const u64 olen, const u64 olen_aligned, - const u64 destoff) + const u64 destoff, int no_time_update) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path = NULL; @@ -3652,7 +3655,8 @@ process_slot: root->sectorsize); ret = clone_finish_inode_update(trans, inode, last_dest_end, - destoff, olen); + destoff, olen, + no_time_update); if (ret) goto out; if (new_key.offset + datal >= destoff + len) @@ -3690,7 +3694,7 @@ process_slot: clone_update_extent_map(inode, trans, NULL, last_dest_end, destoff + len - last_dest_end); ret = clone_finish_inode_update(trans, inode, destoff + len, - destoff, olen); + destoff, olen, no_time_update); } out: @@ -3827,7 +3831,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, lock_extent_range(inode, destoff, len); } - ret = btrfs_clone(src, inode, off, olen, len, destoff); + ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); if (same_inode) { u64 lock_start = min_t(u64, off, destoff); -- cgit From 61de718fceb6bc028dafe4d06a1f87a9e0998303 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 1 Jul 2015 12:13:10 +0100 Subject: Btrfs: fix memory corruption on failure to submit bio for direct IO If we fail to submit a bio for a direct IO request, we were grabbing the corresponding ordered extent and decrementing its reference count twice, once for our lookup reference and once for the ordered tree reference. This was a problem because it caused the ordered extent to be freed without removing it from the ordered tree and any lists it might be attached to, leaving dangling pointers to the ordered extent around. Example trace with CONFIG_DEBUG_PAGEALLOC=y: [161779.858707] BUG: unable to handle kernel paging request at 0000000087654330 [161779.859983] IP: [] rb_prev+0x22/0x3b [161779.860636] PGD 34d818067 PUD 0 [161779.860636] Oops: 0000 [#1] PREEMPT SMP DEBUG_PAGEALLOC (...) [161779.860636] Call Trace: [161779.860636] [] __tree_search+0xd9/0xf9 [btrfs] [161779.860636] [] tree_search+0x42/0x63 [btrfs] [161779.860636] [] ? btrfs_lookup_ordered_range+0x2d/0xa5 [btrfs] [161779.860636] [] btrfs_lookup_ordered_range+0x38/0xa5 [btrfs] [161779.860636] [] btrfs_get_blocks_direct+0x11b/0x615 [btrfs] [161779.860636] [] do_blockdev_direct_IO+0x5ff/0xb43 [161779.860636] [] ? btrfs_page_exists_in_range+0x1ad/0x1ad [btrfs] [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] __blockdev_direct_IO+0x32/0x34 [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] btrfs_direct_IO+0x198/0x21f [btrfs] [161779.860636] [] ? btrfs_get_extent_fiemap+0x1bc/0x1bc [btrfs] [161779.860636] [] generic_file_direct_write+0xb3/0x128 [161779.860636] [] ? btrfs_file_write_iter+0x15f/0x3e0 [btrfs] [161779.860636] [] btrfs_file_write_iter+0x201/0x3e0 [btrfs] (...) We were also not freeing the btrfs_dio_private we allocated previously, which kmemleak reported with the following trace in its sysfs file: unreferenced object 0xffff8803f553bf80 (size 96): comm "xfs_io", pid 4501, jiffies 4295039588 (age 173.936s) hex dump (first 32 bytes): 88 6c 9b f5 02 88 ff ff 00 00 00 00 00 00 00 00 .l.............. 00 00 00 00 00 00 00 00 00 00 c4 00 00 00 00 00 ................ backtrace: [] create_object+0x172/0x29a [] kmemleak_alloc+0x25/0x41 [] kmemleak_alloc_recursive.constprop.40+0x16/0x18 [] kmem_cache_alloc_trace+0xfb/0x148 [] btrfs_submit_direct+0x65/0x16a [btrfs] [] dio_bio_submit+0x62/0x8f [] do_blockdev_direct_IO+0x97e/0xb43 [] __blockdev_direct_IO+0x32/0x34 [] btrfs_direct_IO+0x198/0x21f [btrfs] [] generic_file_direct_write+0xb3/0x128 [] btrfs_file_write_iter+0x201/0x3e0 [btrfs] [] __vfs_write+0x7c/0xa5 [] vfs_write+0xa0/0xe4 [] SyS_pwrite64+0x64/0x82 [] system_call_fastpath+0x12/0x6f [] 0xffffffffffffffff For read requests we weren't doing any cleanup either (none of the work done by btrfs_endio_direct_read()), so a failure submitting a bio for a read request would leave a range in the inode's io_tree locked forever, blocking any future operations (both reads and writes) against that range. So fix this by making sure we do the same cleanup that we do for the case where the bio submission succeeds. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 65 +++++++++++++++++++++++++++++++++++-------------- fs/btrfs/ordered-data.c | 5 ++++ 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 855935f6671a..c0b2b6b51b2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8163,9 +8163,8 @@ out_err: static void btrfs_submit_direct(int rw, struct bio *dio_bio, struct inode *inode, loff_t file_offset) { - struct btrfs_root *root = BTRFS_I(inode)->root; - struct btrfs_dio_private *dip; - struct bio *io_bio; + struct btrfs_dio_private *dip = NULL; + struct bio *io_bio = NULL; struct btrfs_io_bio *btrfs_bio; int skip_sum; int write = rw & REQ_WRITE; @@ -8182,7 +8181,7 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, dip = kzalloc(sizeof(*dip), GFP_NOFS); if (!dip) { ret = -ENOMEM; - goto free_io_bio; + goto free_ordered; } dip->private = dio_bio->bi_private; @@ -8210,25 +8209,55 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio, if (btrfs_bio->end_io) btrfs_bio->end_io(btrfs_bio, ret); -free_io_bio: - bio_put(io_bio); free_ordered: /* - * If this is a write, we need to clean up the reserved space and kill - * the ordered extent. + * If we arrived here it means either we failed to submit the dip + * or we either failed to clone the dio_bio or failed to allocate the + * dip. If we cloned the dio_bio and allocated the dip, we can just + * call bio_endio against our io_bio so that we get proper resource + * cleanup if we fail to submit the dip, otherwise, we must do the + * same as btrfs_endio_direct_[write|read] because we can't call these + * callbacks - they require an allocated dip and a clone of dio_bio. */ - if (write) { - struct btrfs_ordered_extent *ordered; - ordered = btrfs_lookup_ordered_extent(inode, file_offset); - if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && - !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) - btrfs_free_reserved_extent(root, ordered->start, - ordered->disk_len, 1); - btrfs_put_ordered_extent(ordered); - btrfs_put_ordered_extent(ordered); + if (io_bio && dip) { + bio_endio(io_bio, ret); + /* + * The end io callbacks free our dip, do the final put on io_bio + * and all the cleanup and final put for dio_bio (through + * dio_end_io()). + */ + dip = NULL; + io_bio = NULL; + } else { + if (write) { + struct btrfs_ordered_extent *ordered; + + ordered = btrfs_lookup_ordered_extent(inode, + file_offset); + set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); + /* + * Decrements our ref on the ordered extent and removes + * the ordered extent from the inode's ordered tree, + * doing all the proper resource cleanup such as for the + * reserved space and waking up any waiters for this + * ordered extent (through btrfs_remove_ordered_extent). + */ + btrfs_finish_ordered_io(ordered); + } else { + unlock_extent(&BTRFS_I(inode)->io_tree, file_offset, + file_offset + dio_bio->bi_iter.bi_size - 1); + } + clear_bit(BIO_UPTODATE, &dio_bio->bi_flags); + /* + * Releases and cleans up our dio_bio, no need to bio_put() + * nor bio_endio()/bio_io_error() against dio_bio. + */ + dio_end_io(dio_bio, ret); } - bio_endio(dio_bio, ret); + if (io_bio) + bio_put(io_bio); + kfree(dip); } static ssize_t check_direct_IO(struct btrfs_root *root, struct kiocb *iocb, diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 89656d799ff6..52170cf1757e 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -552,6 +552,10 @@ void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry) trace_btrfs_ordered_extent_put(entry->inode, entry); if (atomic_dec_and_test(&entry->refs)) { + ASSERT(list_empty(&entry->log_list)); + ASSERT(list_empty(&entry->trans_list)); + ASSERT(list_empty(&entry->root_extent_list)); + ASSERT(RB_EMPTY_NODE(&entry->rb_node)); if (entry->inode) btrfs_add_delayed_iput(entry->inode); while (!list_empty(&entry->list)) { @@ -579,6 +583,7 @@ void btrfs_remove_ordered_extent(struct inode *inode, spin_lock_irq(&tree->lock); node = &entry->rb_node; rb_erase(node, &tree->tree); + RB_CLEAR_NODE(node); if (tree->last == node) tree->last = NULL; set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); -- cgit From 9c6429d96daec64f6b5b10a1c6b02c7264541ea1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Wed, 10 Jun 2015 12:55:41 +0100 Subject: Btrfs: fix a comment in inode.c:evict_inode_truncate_pages() The comment was not correct about the part where it says the endio callback of the bio might have not yet been called - update it to mention that by that time the endio callback execution might still be in progress only. Signed-off-by: Filipe Manana Reviewed-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c0b2b6b51b2a..53afda0ef4e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4989,8 +4989,9 @@ static void evict_inode_truncate_pages(struct inode *inode) /* * Keep looping until we have no more ranges in the io tree. * We can have ongoing bios started by readpages (called from readahead) - * that didn't get their end io callbacks called yet or they are still - * in progress ((extent_io.c:end_bio_extent_readpage()). This means some + * that have their endio callback (extent_io.c:end_bio_extent_readpage) + * still in progress (unlocked the pages in the bio but did not yet + * unlocked the ranges in the io tree). Therefore this means some * ranges can still be locked and eviction started because before * submitting those bios, which are executed by a separate task (work * queue kthread), inode references (inode->i_count) were not taken -- cgit From ad9ee2053f3f2babebc09ebc4970daa66c56c7ee Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 17 Jun 2015 16:59:57 +0800 Subject: Btrfs: fix hang when failing to submit bio of directIO The hang is uncoverd by generic/019. btrfs_endio_direct_write() skips the "finish_ordered_fn" part when it hits an error, thus those added ordered extents will never get processed, which block processes that waiting for them via btrfs_start_ordered_extent(). This fixes the above, and meanwhile finish_ordered_fn will do the space accounting work. Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 53afda0ef4e3..0b9fb81ccf8a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7872,8 +7872,6 @@ static void btrfs_endio_direct_write(struct bio *bio, int err) struct bio *dio_bio; int ret; - if (err) - goto out_done; again: ret = btrfs_dec_test_first_ordered_pending(inode, &ordered, &ordered_offset, @@ -7896,7 +7894,6 @@ out_test: ordered = NULL; goto again; } -out_done: dio_bio = dip->dio_bio; kfree(dip); -- cgit From ddba1bfc2369cd0566bcfdab47599834a32d1c19 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 17 Jun 2015 16:59:58 +0800 Subject: Btrfs: fix warning of bytes_may_use While running generic/019, dmesg got several warnings from btrfs_free_reserved_data_space(). Test generic/019 produces some disk failures so sumbit dio will get errors, in which case, btrfs_direct_IO() goes to the error handling and free bytes_may_use, but the problem is that bytes_may_use has been free'd during get_block(). This adds a runtime flag to show if we've gone through get_block(), if so, don't do the cleanup work. Signed-off-by: Liu Bo Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 2 ++ fs/btrfs/inode.c | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 0ef5cc13fae2..81220b2203c6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -44,6 +44,8 @@ #define BTRFS_INODE_IN_DELALLOC_LIST 9 #define BTRFS_INODE_READDIO_NEED_LOCK 10 #define BTRFS_INODE_HAS_PROPS 11 +/* DIO is ready to submit */ +#define BTRFS_INODE_DIO_READY 12 /* * The following 3 bits are meant only for the btree inode. * When any of them is set, it means an error happened while writing an diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0b9fb81ccf8a..b33c0cf02668 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7547,6 +7547,7 @@ unlock: current->journal_info = outstanding_extents; btrfs_free_reserved_data_space(inode, len); + set_bit(BTRFS_INODE_DIO_READY, &BTRFS_I(inode)->runtime_flags); } /* @@ -8357,9 +8358,18 @@ static ssize_t btrfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter, btrfs_submit_direct, flags); if (iov_iter_rw(iter) == WRITE) { current->journal_info = NULL; - if (ret < 0 && ret != -EIOCBQUEUED) - btrfs_delalloc_release_space(inode, count); - else if (ret >= 0 && (size_t)ret < count) + if (ret < 0 && ret != -EIOCBQUEUED) { + /* + * If the error comes from submitting stage, + * btrfs_get_blocsk_direct() has free'd data space, + * and metadata space will be handled by + * finish_ordered_fn, don't do that again to make + * sure bytes_may_use is correct. + */ + if (!test_and_clear_bit(BTRFS_INODE_DIO_READY, + &BTRFS_I(inode)->runtime_flags)) + btrfs_delalloc_release_space(inode, count); + } else if (ret >= 0 && (size_t)ret < count) btrfs_delalloc_release_space(inode, count - (size_t)ret); } -- cgit From 9689457b5b0a2b69874c421a489d3fb50ca76b7b Mon Sep 17 00:00:00 2001 From: Shilong Wang Date: Sun, 12 Apr 2015 14:35:20 +0800 Subject: Btrfs: fix wrong check for btrfs_force_chunk_alloc() btrfs_force_chunk_alloc() return 1 for allocation chunk successfully. This problem exists since commit c87f08ca4. With this patch, we might fix some enospc problems for balances. Signed-off-by: Wang Shilong Reviewed-by: Filipe Manana Tested-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/relocation.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 827951fbf7fc..88cbb5995667 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -4049,7 +4049,7 @@ restart: if (trans && progress && err == -ENOSPC) { ret = btrfs_force_chunk_alloc(trans, rc->extent_root, rc->block_group->flags); - if (ret == 0) { + if (ret == 1) { err = 0; progress = 0; goto restart; -- cgit From 0f0ff9a9f3fa2ec6f427603fd521d5f3a0b076d1 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Wed, 1 Jul 2015 23:37:46 -0400 Subject: ext4: fix fencepost error in lazytime optimization Commit 8f4d8558391: "ext4: fix lazytime optimization" was not a complete fix. In the case where the inode number is a multiple of 16, and we could still end up updating an inode with dirty timestamps written to the wrong inode on disk. Oops. This can be easily reproduced by using generic/005 with a file system with metadata_csum and lazytime enabled. Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index e057c6fcc227..4ad73d3c1003 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4348,7 +4348,12 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = (orig_ino & ~(inodes_per_block - 1)) + 1; + /* + * Calculate the first inode in the inode table block. Inode + * numbers are one-based. That is, the first inode in a block + * (assuming 4k blocks and 256 byte inodes) is (n*16 + 1). + */ + ino = ((orig_ino - 1) & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; -- cgit From bd7ade3cd9b0850264306f5c2b79024a417b6396 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:32:44 -0400 Subject: bufferhead: Add _gfp version for sb_getblk() sb_getblk() is used during ext4 (and possibly other FSes) writeback paths. Sometimes such path require allocating memory and guaranteeing that such allocation won't block. Currently, however, there is no way to provide user flags for sb_getblk which could lead to deadlocks. This patch implements a sb_getblk_gfp with the only difference it can accept user-provided GFP flags. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/buffer_head.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 73b45225a7ca..e6797ded700e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -317,6 +317,13 @@ sb_getblk(struct super_block *sb, sector_t block) return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE); } + +static inline struct buffer_head * +sb_getblk_gfp(struct super_block *sb, sector_t block, gfp_t gfp) +{ + return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, gfp); +} + static inline struct buffer_head * sb_find_get_block(struct super_block *sb, sector_t block) { -- cgit From c45653c341f5c8a0ce19c8f0ad4678640849cb86 Mon Sep 17 00:00:00 2001 From: Nikolay Borisov Date: Thu, 2 Jul 2015 01:34:07 -0400 Subject: ext4: avoid deadlocks in the writeback path by using sb_getblk_gfp Switch ext4 to using sb_getblk_gfp with GFP_NOFS added to fix possible deadlocks in the page writeback path. Signed-off-by: Nikolay Borisov Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/extents.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d86d2622f826..69af30e0d75d 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -503,7 +503,7 @@ __read_extent_tree_block(const char *function, unsigned int line, struct buffer_head *bh; int err; - bh = sb_getblk(inode->i_sb, pblk); + bh = sb_getblk_gfp(inode->i_sb, pblk, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return ERR_PTR(-ENOMEM); @@ -1088,7 +1088,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode, err = -EIO; goto cleanup; } - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) { err = -ENOMEM; goto cleanup; @@ -1282,7 +1282,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, if (newblock == 0) return err; - bh = sb_getblk(inode->i_sb, newblock); + bh = sb_getblk_gfp(inode->i_sb, newblock, __GFP_MOVABLE | GFP_NOFS); if (unlikely(!bh)) return -ENOMEM; lock_buffer(bh); -- cgit From c423bc85097327fba71f9a831280d09b64bb62b6 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 16 Jun 2015 14:54:51 +0300 Subject: drm/omap: check that plane is inside crtc DRM allows planes to be partially off-screen, but DSS hardware does not. This patch adds the necessary check to reject plane configs if the plane is not fully inside the crtc. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_plane.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/drivers/gpu/drm/omapdrm/omap_plane.c b/drivers/gpu/drm/omapdrm/omap_plane.c index cfa8276c4deb..098904696a5c 100644 --- a/drivers/gpu/drm/omapdrm/omap_plane.c +++ b/drivers/gpu/drm/omapdrm/omap_plane.c @@ -17,6 +17,7 @@ * this program. If not, see . */ +#include #include #include @@ -153,9 +154,34 @@ static void omap_plane_atomic_disable(struct drm_plane *plane, dispc_ovl_enable(omap_plane->id, false); } +static int omap_plane_atomic_check(struct drm_plane *plane, + struct drm_plane_state *state) +{ + struct drm_crtc_state *crtc_state; + + if (!state->crtc) + return 0; + + crtc_state = drm_atomic_get_crtc_state(state->state, state->crtc); + if (IS_ERR(crtc_state)) + return PTR_ERR(crtc_state); + + if (state->crtc_x < 0 || state->crtc_y < 0) + return -EINVAL; + + if (state->crtc_x + state->crtc_w > crtc_state->adjusted_mode.hdisplay) + return -EINVAL; + + if (state->crtc_y + state->crtc_h > crtc_state->adjusted_mode.vdisplay) + return -EINVAL; + + return 0; +} + static const struct drm_plane_helper_funcs omap_plane_helper_funcs = { .prepare_fb = omap_plane_prepare_fb, .cleanup_fb = omap_plane_cleanup_fb, + .atomic_check = omap_plane_atomic_check, .atomic_update = omap_plane_atomic_update, .atomic_disable = omap_plane_atomic_disable, }; -- cgit From 96cbd142310cc7281f94739b27d8e4c308d588c5 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:32 +0300 Subject: drm/omap: increase DMM transaction timeout The DMM driver uses a timeout of 1 ms to wait for DMM transaction to finish. While DMM should always finish the operation within that time, the timeout is rather strict. Small misbehavior of the system (e.g. an irq taking too long) could trigger the timeout. As the DMM is a critical piece of code for display memory management, let's increase the timeout to 100 ms so that we are less likely to fail a memory allocation in case of system misbehaviors. 100 ms is just a guess of a reasonably large timeout. The HW should accomplish the task in less than 1 ms. Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_dmm_tiler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c index f2daad8c3d96..7841970de48d 100644 --- a/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c +++ b/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c @@ -285,7 +285,7 @@ static int dmm_txn_commit(struct dmm_txn *txn, bool wait) if (wait) { if (!wait_for_completion_timeout(&engine->compl, - msecs_to_jiffies(1))) { + msecs_to_jiffies(100))) { dev_err(dmm->dev, "timed out waiting for done\n"); ret = -ETIMEDOUT; } -- cgit From 9c368506c96793f17934a61a0f06412afb7d2f8d Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:35 +0300 Subject: drm/omap: fix omap_framebuffer_unpin() error handling omap_framebuffer_unpin() check the return value of omap_gem_put_paddr() and return immediately if omap_gem_put_paddr() fails. This patch removes the check for the return value, and also removes the return value of omap_framebuffer_unpin(), because: * Nothing checks the return value of omap_framebuffer_unpin(), and even something did check it, there's nothing the caller can do to handle the error. * If a omap_gem_put_paddr() fails, the framebuffer's other planes will be left unreleased. So it's better to call omap_gem_put_paddr() for all the planes, even if one would fail. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- drivers/gpu/drm/omapdrm/omap_fb.c | 16 ++++------------ 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index ae2df41f216f..2ef89c0c3006 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -177,7 +177,7 @@ struct drm_framebuffer *omap_framebuffer_init(struct drm_device *dev, struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object **bos); struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p); int omap_framebuffer_pin(struct drm_framebuffer *fb); -int omap_framebuffer_unpin(struct drm_framebuffer *fb); +void omap_framebuffer_unpin(struct drm_framebuffer *fb); void omap_framebuffer_update_scanout(struct drm_framebuffer *fb, struct omap_drm_window *win, struct omap_overlay_info *info); struct drm_connector *omap_framebuffer_get_next_connector( diff --git a/drivers/gpu/drm/omapdrm/omap_fb.c b/drivers/gpu/drm/omapdrm/omap_fb.c index 0b967e76df1a..51b1219af87f 100644 --- a/drivers/gpu/drm/omapdrm/omap_fb.c +++ b/drivers/gpu/drm/omapdrm/omap_fb.c @@ -287,10 +287,10 @@ fail: } /* unpin, no longer being scanned out: */ -int omap_framebuffer_unpin(struct drm_framebuffer *fb) +void omap_framebuffer_unpin(struct drm_framebuffer *fb) { struct omap_framebuffer *omap_fb = to_omap_framebuffer(fb); - int ret, i, n = drm_format_num_planes(fb->pixel_format); + int i, n = drm_format_num_planes(fb->pixel_format); mutex_lock(&omap_fb->lock); @@ -298,24 +298,16 @@ int omap_framebuffer_unpin(struct drm_framebuffer *fb) if (omap_fb->pin_count > 0) { mutex_unlock(&omap_fb->lock); - return 0; + return; } for (i = 0; i < n; i++) { struct plane *plane = &omap_fb->planes[i]; - ret = omap_gem_put_paddr(plane->bo); - if (ret) - goto fail; + omap_gem_put_paddr(plane->bo); plane->paddr = 0; } mutex_unlock(&omap_fb->lock); - - return 0; - -fail: - mutex_unlock(&omap_fb->lock); - return ret; } struct drm_gem_object *omap_framebuffer_bo(struct drm_framebuffer *fb, int p) -- cgit From 393a949f51994a67e8e33b2f79c6f2020959a7e2 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 28 Apr 2015 14:01:36 +0300 Subject: drm/omap: fix omap_gem_put_paddr() error handling If tiler_unpin() call in omap_gem_put_paddr() fails, omap_gem_put_paddr() will immediately stop processing and return an error. This patch remoes that error checking, and also removes omap_gem_put_paddr()'s return value, because: * The caller of omap_gem_put_paddr() can do nothing if an error happens, so it's pointless to return an error value * If tiler_unpin() fails, the GEM object will possibly be left in an undefined state, where the DMM mapping may have been removed, but the GEM object still thinks everything is as it should be, leading to crashes later. * There's no point in returning an error from a "free" call, as the caller can do nothing about it. So it's better to clean up as much as possible. Signed-off-by: Tomi Valkeinen Acked-by: Laurent Pinchart --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- drivers/gpu/drm/omapdrm/omap_gem.c | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 2ef89c0c3006..1ae8477e4289 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -211,7 +211,7 @@ void omap_gem_dma_sync(struct drm_gem_object *obj, enum dma_data_direction dir); int omap_gem_get_paddr(struct drm_gem_object *obj, dma_addr_t *paddr, bool remap); -int omap_gem_put_paddr(struct drm_gem_object *obj); +void omap_gem_put_paddr(struct drm_gem_object *obj); int omap_gem_get_pages(struct drm_gem_object *obj, struct page ***pages, bool remap); int omap_gem_put_pages(struct drm_gem_object *obj); diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c index 874eb6dcf94b..7ed08fdc4c42 100644 --- a/drivers/gpu/drm/omapdrm/omap_gem.c +++ b/drivers/gpu/drm/omapdrm/omap_gem.c @@ -808,10 +808,10 @@ fail: /* Release physical address, when DMA is no longer being performed.. this * could potentially unpin and unmap buffers from TILER */ -int omap_gem_put_paddr(struct drm_gem_object *obj) +void omap_gem_put_paddr(struct drm_gem_object *obj) { struct omap_gem_object *omap_obj = to_omap_bo(obj); - int ret = 0; + int ret; mutex_lock(&obj->dev->struct_mutex); if (omap_obj->paddr_cnt > 0) { @@ -821,7 +821,6 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) if (ret) { dev_err(obj->dev->dev, "could not unpin pages: %d\n", ret); - goto fail; } ret = tiler_release(omap_obj->block); if (ret) { @@ -832,9 +831,8 @@ int omap_gem_put_paddr(struct drm_gem_object *obj) omap_obj->block = NULL; } } -fail: + mutex_unlock(&obj->dev->struct_mutex); - return ret; } /* Get rotated scanout address (only valid if already pinned), at the -- cgit From d642d3acd89454006fd6ca9cc4dd57f26959f9d1 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Fri, 8 May 2015 13:32:31 +0300 Subject: drm/omap: fix align_pitch() for 24 bits per pixel align_pitch() uses ALIGN() to ensure the pitch is aligned to SGX's requirement of 8 pixels. However, ALIGN() expects the alignment value to be a power of two, which is not the case for 24 bits per pixels. Use roundup() instead, which works for all alignments. This fixes the error seen with 24 bits per pixel modes: "buffer pitch (2176 bytes) is not a multiple of pixel size (3 bytes)" Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_drv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h index 1ae8477e4289..12081e61d45a 100644 --- a/drivers/gpu/drm/omapdrm/omap_drv.h +++ b/drivers/gpu/drm/omapdrm/omap_drv.h @@ -236,7 +236,7 @@ static inline int align_pitch(int pitch, int width, int bpp) /* PVR needs alignment to 8 pixels.. right now that is the most * restrictive stride requirement.. */ - return ALIGN(pitch, 8 * bytespp); + return roundup(pitch, 8 * bytespp); } /* map crtc to vblank mask */ -- cgit From 743c16719f671c206923d23dae4ac57edfd9483c Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Tue, 1 Jul 2014 20:52:50 +0200 Subject: drm/omap: replace ALIGN(PAGE_SIZE) by PAGE_ALIGN use mm.h definition Cc: David Airlie Cc: Tomi Valkeinen Cc: dri-devel@lists.freedesktop.org Signed-off-by: Fabian Frederick Signed-off-by: Tomi Valkeinen --- drivers/gpu/drm/omapdrm/omap_fbdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c index 23b5a84389e3..720d16bce7e8 100644 --- a/drivers/gpu/drm/omapdrm/omap_fbdev.c +++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c @@ -135,7 +135,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper, fbdev->ywrap_enabled = priv->has_dmm && ywrap_enabled; if (fbdev->ywrap_enabled) { /* need to align pitch to page size if using DMM scrolling */ - mode_cmd.pitches[0] = ALIGN(mode_cmd.pitches[0], PAGE_SIZE); + mode_cmd.pitches[0] = PAGE_ALIGN(mode_cmd.pitches[0]); } /* allocate backing bo */ -- cgit From 45a481c2176f6acca2efb6477c6018b2c3e3c60f Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Mon, 29 Jun 2015 14:30:09 -0700 Subject: clk: iproc: fix memory leak from clock name of_property_read_string_index takes array of pointers and assign them to strings read from device tree property. No additional memory allocation is needed prior to calling of_property_read_string_index. In fact, since the array of pointers will be re-assigned to other strings, any memory that it points to prior to calling of_property_read_string_index will be leaked Reported-by: Dan Carpenter Signed-off-by: Ray Jui Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-iproc-asiu.c | 6 +----- drivers/clk/bcm/clk-iproc-pll.c | 8 +------- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-asiu.c b/drivers/clk/bcm/clk-iproc-asiu.c index e19c09cd9645..f630e1bbdcfe 100644 --- a/drivers/clk/bcm/clk-iproc-asiu.c +++ b/drivers/clk/bcm/clk-iproc-asiu.c @@ -222,10 +222,6 @@ void __init iproc_asiu_setup(struct device_node *node, struct iproc_asiu_clk *asiu_clk; const char *clk_name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -259,7 +255,7 @@ void __init iproc_asiu_setup(struct device_node *node, err_clk_register: for (i = 0; i < num_clks; i++) - kfree(asiu->clks[i].name); + clk_unregister(asiu->clk_data.clks[i]); iounmap(asiu->gate_base); err_iomap_gate: diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index 46fb84bc2674..a8d971b820b0 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -655,10 +655,6 @@ void __init iproc_pll_clk_setup(struct device_node *node, memset(&init, 0, sizeof(init)); parent_name = node->name; - clk_name = kzalloc(IPROC_CLK_NAME_LEN, GFP_KERNEL); - if (WARN_ON(!clk_name)) - goto err_clk_register; - ret = of_property_read_string_index(node, "clock-output-names", i, &clk_name); if (WARN_ON(ret)) @@ -690,10 +686,8 @@ void __init iproc_pll_clk_setup(struct device_node *node, return; err_clk_register: - for (i = 0; i < num_clks; i++) { - kfree(pll->clks[i].name); + for (i = 0; i < num_clks; i++) clk_unregister(pll->clk_data.clks[i]); - } err_pll_register: if (pll->asiu_base) -- cgit From 69916d96094e1e16567c5f25515a13ed2896c730 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Mon, 29 Jun 2015 14:30:10 -0700 Subject: clk: iproc: fix bit manipulation arithmetic A 32-bit variable should be type casted to 64-bit before arithmetic operation and assigning it to a 64-bit variable Reported-by: Dan Carpenter Signed-off-by: Ray Jui Fixes: 5fe225c105fd ("clk: iproc: add initial common clock support") Signed-off-by: Stephen Boyd --- drivers/clk/bcm/clk-iproc-pll.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/clk/bcm/clk-iproc-pll.c b/drivers/clk/bcm/clk-iproc-pll.c index a8d971b820b0..2dda4e8295a9 100644 --- a/drivers/clk/bcm/clk-iproc-pll.c +++ b/drivers/clk/bcm/clk-iproc-pll.c @@ -366,7 +366,7 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, val = readl(pll->pll_base + ctrl->ndiv_int.offset); ndiv_int = (val >> ctrl->ndiv_int.shift) & bit_mask(ctrl->ndiv_int.width); - ndiv = ndiv_int << ctrl->ndiv_int.shift; + ndiv = (u64)ndiv_int << ctrl->ndiv_int.shift; if (ctrl->flags & IPROC_CLK_PLL_HAS_NDIV_FRAC) { val = readl(pll->pll_base + ctrl->ndiv_frac.offset); @@ -374,7 +374,8 @@ static unsigned long iproc_pll_recalc_rate(struct clk_hw *hw, bit_mask(ctrl->ndiv_frac.width); if (ndiv_frac != 0) - ndiv = (ndiv_int << ctrl->ndiv_int.shift) | ndiv_frac; + ndiv = ((u64)ndiv_int << ctrl->ndiv_int.shift) | + ndiv_frac; } val = readl(pll->pll_base + ctrl->pdiv.offset); -- cgit From 15ab38273d21a45487116ad4c428593427954848 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Sun, 28 Jun 2015 10:55:32 +0100 Subject: clk: stm32: Fix out-by-one error path in the index lookup If stm32f4_rcc_lookup() is called with primary == 0 and secondary == 192 then it will read beyond the end of the table array due to an out-by-one error in the range check. In addition to the fixing the inequality we also modify the r.h.s. to make it even more explicit that we are comparing against the size of table in bits. Reported-by: Dan Carpenter Signed-off-by: Daniel Thompson Acked-by: Maxime Coquelin Fixes: 358bdf892f6b ("clk: stm32: Add clock driver for STM32F4[23]xxx devices") Signed-off-by: Stephen Boyd --- drivers/clk/clk-stm32f4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/clk-stm32f4.c b/drivers/clk/clk-stm32f4.c index b9b12a742970..3f6f7ad39490 100644 --- a/drivers/clk/clk-stm32f4.c +++ b/drivers/clk/clk-stm32f4.c @@ -268,7 +268,7 @@ static int stm32f4_rcc_lookup_clk_idx(u8 primary, u8 secondary) memcpy(table, stm32f42xx_gate_map, sizeof(table)); /* only bits set in table can be used as indices */ - if (WARN_ON(secondary > 8 * sizeof(table) || + if (WARN_ON(secondary >= BITS_PER_BYTE * sizeof(table) || 0 == (table[BIT_ULL_WORD(secondary)] & BIT_ULL_MASK(secondary)))) return -EINVAL; -- cgit From c76a024e82bdb83a0f7d57e006f8e7f8ddf983e5 Mon Sep 17 00:00:00 2001 From: David Dueck Date: Fri, 26 Jun 2015 15:30:22 +0200 Subject: clk: at91: do not leak resources Do not leak memory and free irqs in case of an error. Acked-by: Boris Brezillon Signed-off-by: David Dueck Signed-off-by: Stephen Boyd --- drivers/clk/at91/clk-h32mx.c | 4 +++- drivers/clk/at91/clk-main.c | 4 +++- drivers/clk/at91/clk-master.c | 8 ++++++-- drivers/clk/at91/clk-pll.c | 8 ++++++-- drivers/clk/at91/clk-system.c | 8 ++++++-- drivers/clk/at91/clk-utmi.c | 8 ++++++-- 6 files changed, 30 insertions(+), 10 deletions(-) diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c index 152dcb3f7b5f..61566bcefa53 100644 --- a/drivers/clk/at91/clk-h32mx.c +++ b/drivers/clk/at91/clk-h32mx.c @@ -116,8 +116,10 @@ void __init of_sama5d4_clk_h32mx_setup(struct device_node *np, h32mxclk->pmc = pmc; clk = clk_register(NULL, &h32mxclk->hw); - if (!clk) + if (!clk) { + kfree(h32mxclk); return; + } of_clk_add_provider(np, of_clk_src_simple_get, clk); } diff --git a/drivers/clk/at91/clk-main.c b/drivers/clk/at91/clk-main.c index c2400456a044..27dfa965cfed 100644 --- a/drivers/clk/at91/clk-main.c +++ b/drivers/clk/at91/clk-main.c @@ -171,8 +171,10 @@ at91_clk_register_main_osc(struct at91_pmc *pmc, irq_set_status_flags(osc->irq, IRQ_NOAUTOEN); ret = request_irq(osc->irq, clk_main_osc_irq_handler, IRQF_TRIGGER_HIGH, name, osc); - if (ret) + if (ret) { + kfree(osc); return ERR_PTR(ret); + } if (bypass) pmc_write(pmc, AT91_CKGR_MOR, diff --git a/drivers/clk/at91/clk-master.c b/drivers/clk/at91/clk-master.c index f98eafe9b12d..5b3ded5205a2 100644 --- a/drivers/clk/at91/clk-master.c +++ b/drivers/clk/at91/clk-master.c @@ -165,12 +165,16 @@ at91_clk_register_master(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(master->irq, IRQ_NOAUTOEN); ret = request_irq(master->irq, clk_master_irq_handler, IRQF_TRIGGER_HIGH, "clk-master", master); - if (ret) + if (ret) { + kfree(master); return ERR_PTR(ret); + } clk = clk_register(NULL, &master->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(master->irq, master); kfree(master); + } return clk; } diff --git a/drivers/clk/at91/clk-pll.c b/drivers/clk/at91/clk-pll.c index 6ec79dbc0840..23163be24b6f 100644 --- a/drivers/clk/at91/clk-pll.c +++ b/drivers/clk/at91/clk-pll.c @@ -338,12 +338,16 @@ at91_clk_register_pll(struct at91_pmc *pmc, unsigned int irq, const char *name, irq_set_status_flags(pll->irq, IRQ_NOAUTOEN); ret = request_irq(pll->irq, clk_pll_irq_handler, IRQF_TRIGGER_HIGH, id ? "clk-pllb" : "clk-plla", pll); - if (ret) + if (ret) { + kfree(pll); return ERR_PTR(ret); + } clk = clk_register(NULL, &pll->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(pll->irq, pll); kfree(pll); + } return clk; } diff --git a/drivers/clk/at91/clk-system.c b/drivers/clk/at91/clk-system.c index a76d03fd577b..58008b3e8bc1 100644 --- a/drivers/clk/at91/clk-system.c +++ b/drivers/clk/at91/clk-system.c @@ -130,13 +130,17 @@ at91_clk_register_system(struct at91_pmc *pmc, const char *name, irq_set_status_flags(sys->irq, IRQ_NOAUTOEN); ret = request_irq(sys->irq, clk_system_irq_handler, IRQF_TRIGGER_HIGH, name, sys); - if (ret) + if (ret) { + kfree(sys); return ERR_PTR(ret); + } } clk = clk_register(NULL, &sys->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(sys->irq, sys); kfree(sys); + } return clk; } diff --git a/drivers/clk/at91/clk-utmi.c b/drivers/clk/at91/clk-utmi.c index ae3263bc1476..30dd697b1668 100644 --- a/drivers/clk/at91/clk-utmi.c +++ b/drivers/clk/at91/clk-utmi.c @@ -118,12 +118,16 @@ at91_clk_register_utmi(struct at91_pmc *pmc, unsigned int irq, irq_set_status_flags(utmi->irq, IRQ_NOAUTOEN); ret = request_irq(utmi->irq, clk_utmi_irq_handler, IRQF_TRIGGER_HIGH, "clk-utmi", utmi); - if (ret) + if (ret) { + kfree(utmi); return ERR_PTR(ret); + } clk = clk_register(NULL, &utmi->hw); - if (IS_ERR(clk)) + if (IS_ERR(clk)) { + free_irq(utmi->irq, utmi); kfree(utmi); + } return clk; } -- cgit From 13ee9fdba96577eb1583dcd7b15767ef623fae12 Mon Sep 17 00:00:00 2001 From: Szabolcs Nagy Date: Wed, 1 Jul 2015 23:08:10 +0100 Subject: ARM: 8397/1: fix vdsomunge not to depend on glibc specific error.h If the host toolchain is not glibc based then the arm kernel build fails with arch/arm/vdso/vdsomunge.c:53:19: fatal error: error.h: No such file or directory error.h is a glibc only header (ie not available in musl, newlib and bsd libcs). Changed the error reporting to standard conforming code to avoid depending on specific C implementations. Signed-off-by: Szabolcs Nagy Acked-by: Will Deacon Fixes: 8512287a8165 ("ARM: 8330/1: add VDSO user-space code") Cc: stable@vger.kernel.org Signed-off-by: Nathan Lynch Signed-off-by: Russell King --- arch/arm/vdso/vdsomunge.c | 56 ++++++++++++++++++++++++++++------------------- 1 file changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/arm/vdso/vdsomunge.c b/arch/arm/vdso/vdsomunge.c index 9005b07296c8..aedec81d1198 100644 --- a/arch/arm/vdso/vdsomunge.c +++ b/arch/arm/vdso/vdsomunge.c @@ -45,13 +45,11 @@ * it does. */ -#define _GNU_SOURCE - #include #include #include -#include #include +#include #include #include #include @@ -82,11 +80,25 @@ #define EF_ARM_ABI_FLOAT_HARD 0x400 #endif +static int failed; +static const char *argv0; static const char *outfile; +static void fail(const char *fmt, ...) +{ + va_list ap; + + failed = 1; + fprintf(stderr, "%s: ", argv0); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + static void cleanup(void) { - if (error_message_count > 0 && outfile != NULL) + if (failed && outfile != NULL) unlink(outfile); } @@ -119,68 +131,66 @@ int main(int argc, char **argv) int infd; atexit(cleanup); + argv0 = argv[0]; if (argc != 3) - error(EXIT_FAILURE, 0, "Usage: %s [infile] [outfile]", argv[0]); + fail("Usage: %s [infile] [outfile]\n", argv[0]); infile = argv[1]; outfile = argv[2]; infd = open(infile, O_RDONLY); if (infd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", infile); + fail("Cannot open %s: %s\n", infile, strerror(errno)); if (fstat(infd, &stat) != 0) - error(EXIT_FAILURE, errno, "Failed stat for %s", infile); + fail("Failed stat for %s: %s\n", infile, strerror(errno)); inbuf = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, infd, 0); if (inbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", infile); + fail("Failed to map %s: %s\n", infile, strerror(errno)); close(infd); inhdr = inbuf; if (memcmp(&inhdr->e_ident, ELFMAG, SELFMAG) != 0) - error(EXIT_FAILURE, 0, "Not an ELF file"); + fail("Not an ELF file\n"); if (inhdr->e_ident[EI_CLASS] != ELFCLASS32) - error(EXIT_FAILURE, 0, "Unsupported ELF class"); + fail("Unsupported ELF class\n"); swap = inhdr->e_ident[EI_DATA] != HOST_ORDER; if (read_elf_half(inhdr->e_type, swap) != ET_DYN) - error(EXIT_FAILURE, 0, "Not a shared object"); + fail("Not a shared object\n"); - if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) { - error(EXIT_FAILURE, 0, "Unsupported architecture %#x", - inhdr->e_machine); - } + if (read_elf_half(inhdr->e_machine, swap) != EM_ARM) + fail("Unsupported architecture %#x\n", inhdr->e_machine); e_flags = read_elf_word(inhdr->e_flags, swap); if (EF_ARM_EABI_VERSION(e_flags) != EF_ARM_EABI_VER5) { - error(EXIT_FAILURE, 0, "Unsupported EABI version %#x", - EF_ARM_EABI_VERSION(e_flags)); + fail("Unsupported EABI version %#x\n", + EF_ARM_EABI_VERSION(e_flags)); } if (e_flags & EF_ARM_ABI_FLOAT_HARD) - error(EXIT_FAILURE, 0, - "Unexpected hard-float flag set in e_flags"); + fail("Unexpected hard-float flag set in e_flags\n"); clear_soft_float = !!(e_flags & EF_ARM_ABI_FLOAT_SOFT); outfd = open(outfile, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); if (outfd < 0) - error(EXIT_FAILURE, errno, "Cannot open %s", outfile); + fail("Cannot open %s: %s\n", outfile, strerror(errno)); if (ftruncate(outfd, stat.st_size) != 0) - error(EXIT_FAILURE, errno, "Cannot truncate %s", outfile); + fail("Cannot truncate %s: %s\n", outfile, strerror(errno)); outbuf = mmap(NULL, stat.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, outfd, 0); if (outbuf == MAP_FAILED) - error(EXIT_FAILURE, errno, "Failed to map %s", outfile); + fail("Failed to map %s: %s\n", outfile, strerror(errno)); close(outfd); @@ -195,7 +205,7 @@ int main(int argc, char **argv) } if (msync(outbuf, stat.st_size, MS_SYNC) != 0) - error(EXIT_FAILURE, errno, "Failed to sync %s", outfile); + fail("Failed to sync %s: %s\n", outfile, strerror(errno)); return EXIT_SUCCESS; } -- cgit From 11b8b25ce4f8acfd3b438683c0c9ade27756c6e8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Jul 2015 12:42:36 +0100 Subject: ARM: fix lockdep unannotated irqs-off warning Wolfram Sang reported an unannotated irqs-off warning from lockdep: WARNING: CPU: 0 PID: 282 at kernel/locking/lockdep.c:3557 check_flags+0x84/0x1f4() DEBUG_LOCKS_WARN_ON(current->hardirqs_enabled) CPU: 0 PID: 282 Comm: rcS Tainted: G W 4.1.0-00002-g5b076054611833 #179 Hardware name: Generic Emma Mobile EV2 (Flattened Device Tree) Backtrace: [] (dump_backtrace) from [] (show_stack+0x18/0x1c) r6:c02dcc67 r5:00000009 r4:00000000 r3:00400000 [] (show_stack) from [] (dump_stack+0x20/0x28) [] (dump_stack) from [] (warn_slowpath_common+0x8c/0xb4) [] (warn_slowpath_common) from [] (warn_slowpath_fmt+0x38/0x40) r8:c780f470 r7:00000000 r6:00000000 r5:c03b0570 r4:c0b7ec04 [] (warn_slowpath_fmt) from [] (check_flags+0x84/0x1f4) r3:c02e13d8 r2:c02dceaa [] (check_flags) from [] (lock_acquire+0x4c/0xbc) r5:00000000 r4:60000193 [] (lock_acquire) from [] (_raw_spin_lock+0x34/0x44) r9:000a8d5c r8:00000001 r7:c7806000 r6:c780f460 r5:c03b06a0 r4:c780f460 [] (_raw_spin_lock) from [] (handle_fasteoi_irq+0x20/0x11c) r4:c780f400 [] (handle_fasteoi_irq) from [] (generic_handle_irq+0x28/0x38) r6:00000000 r5:c03b038c r4:00000012 r3:c005a8ac [] (generic_handle_irq) from [] (__handle_domain_irq+0x88/0xa8) r4:00000000 r3:00000026 [] (__handle_domain_irq) from [] (gic_handle_irq+0x40/0x58) r8:10c5347d r7:10c5347d r6:c35b1fb0 r5:c03a6304 r4:c8802000 r3:c35b1fb0 [] (gic_handle_irq) from [] (__irq_usr+0x48/0x60) Exception stack(0xc35b1fb0 to 0xc35b1ff8) 1fa0: 00000061 00000000 000ab736 00000066 1fc0: 00000061 000aa1f0 000a8d54 000a8d54 000a8d88 000a8d5c 000a8cc8 000a8d68 1fe0: 72727272 bef8a528 000398c0 00031334 20000010 ffffffff r6:ffffffff r5:20000010 r4:00031334 r3:00000061 ---[ end trace cb88537fdc8fa202 ]--- possible reason: unannotated irqs-off. irq event stamp: 769 hardirqs last enabled at (769): [] ret_fast_syscall+0x2c/0x54 hardirqs last disabled at (768): [] ret_fast_syscall+0xc/0x54 softirqs last enabled at (0): [] copy_process.part.65+0x2e8/0x11dc softirqs last disabled at (0): [< (null)>] (null) His kernel configuration had: CONFIG_PROVE_LOCKING=y CONFIG_TRACE_IRQFLAGS=y but no IRQSOFF_TRACER, which means entry from userspace can result in the kernel seeing IRQs off without being notified of that change of state. Change the IRQSOFF ifdef in the usr_entry macro to TRACE_IRQFLAGS instead. Tested-by: Wolfram Sang Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 570306c49406..dba6cf65c9e4 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -408,7 +408,7 @@ ENDPROC(__fiq_abt) zero_fp .if \trace -#ifdef CONFIG_IRQSOFF_TRACER +#ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif ct_user_exit save = 0 -- cgit From ac5e2f170f033e48cfcdc2c4f74b27083eabffa5 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:02:39 +0100 Subject: ARM: io: document ARM specific behaviour of ioremap*() implementations Add documentation of the ARM specific behaviour of the mappings setup by the ioremap() series of macros. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index 1c3938f26beb..ae10717f61d4 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -348,11 +348,47 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, #endif /* readl */ /* - * ioremap and friends. + * ioremap() and friends. * - * ioremap takes a PCI memory address, as specified in - * Documentation/io-mapping.txt. + * ioremap() takes a resource address, and size. Due to the ARM memory + * types, it is important to use the correct ioremap() function as each + * mapping has specific properties. * + * Function Memory type Cacheability Cache hint + * ioremap() Device n/a n/a + * ioremap_nocache() Device n/a n/a + * ioremap_cache() Normal Writeback Read allocate + * ioremap_wc() Normal Non-cacheable n/a + * ioremap_wt() Normal Non-cacheable n/a + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" + * - writes may be delayed before they hit the endpoint device + * + * ioremap_nocache() is the same as ioremap() as there are too many device + * drivers using this for device registers, and documentation which tells + * people to use it for such for this to be any different. This is not a + * safe fallback for memory-like mappings, or memory regions where the + * compiler may generate unaligned accesses - eg, via inlining its own + * memcpy. + * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * - ordering is not guaranteed without explicit dependencies or barrier + * instructions + * - writes may be delayed before they hit the endpoint memory + * + * The cache hint is only a performance hint: CPUs may alias these hints. + * Eg, a CPU not implementing read allocate but implementing write allocate + * will provide a write allocate mapping instead. */ #define ioremap(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -- cgit From 1e2c727f6c022778d4562147433ca4c0b101f0ad Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:17:55 +0100 Subject: ARM: io: fix ioremap_wt() implementation ioremap_wt() was added by aliasing it to ioremap_nocache(), which is a device mapping. Device mappings do not allow unaligned accesses, but it appears that GCC is able to inline its own memcpy() implementation which may use such accesses. The only user of this is pmem, which uses memcpy() on the region. Therefore, this is unsafe. We must implement ioremap_wt() correctly for ARM, or not at all. This patch adds a more correct implementation by re-using ioremap_wc() to provide a normal-memory non-cacheable mapping. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index ae10717f61d4..f1083ebf214d 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -394,7 +394,7 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, #define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) #define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) #define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) +#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) #define iounmap __arm_iounmap /* -- cgit From 20a1080dff2f1be8933baa0d910c41882c7279ee Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 10:06:32 +0100 Subject: ARM: io: convert ioremap*() to functions Convert the ioremap*() preprocessor macros to real functions, moving them out of line. This allows us to kill off __arm_ioremap(), and __arm_iounmap() helpers, and remove __arm_ioremap_pfn_caller() from global view. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 24 +++++++++++++----------- arch/arm/mm/ioremap.c | 33 +++++++++++++++++++++++---------- arch/arm/mm/nommu.c | 39 ++++++++++++++++++++++++++------------- 3 files changed, 62 insertions(+), 34 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index f1083ebf214d..a5ed237c87d9 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -140,16 +140,11 @@ static inline u32 __raw_readl(const volatile void __iomem *addr) * The _caller variety takes a __builtin_return_address(0) value for * /proc/vmalloc to use - and should only be used in non-inline functions. */ -extern void __iomem *__arm_ioremap_pfn_caller(unsigned long, unsigned long, - size_t, unsigned int, void *); extern void __iomem *__arm_ioremap_caller(phys_addr_t, size_t, unsigned int, void *); - extern void __iomem *__arm_ioremap_pfn(unsigned long, unsigned long, size_t, unsigned int); -extern void __iomem *__arm_ioremap(phys_addr_t, size_t, unsigned int); extern void __iomem *__arm_ioremap_exec(phys_addr_t, size_t, bool cached); extern void __iounmap(volatile void __iomem *addr); -extern void __arm_iounmap(volatile void __iomem *addr); extern void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); @@ -390,12 +385,19 @@ static inline void memcpy_toio(volatile void __iomem *to, const void *from, * Eg, a CPU not implementing read allocate but implementing write allocate * will provide a write allocate mapping instead. */ -#define ioremap(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_nocache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE) -#define ioremap_cache(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_CACHED) -#define ioremap_wc(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define ioremap_wt(cookie,size) __arm_ioremap((cookie), (size), MT_DEVICE_WC) -#define iounmap __arm_iounmap +void __iomem *ioremap(resource_size_t res_cookie, size_t size); +#define ioremap ioremap +#define ioremap_nocache ioremap + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size); +#define ioremap_cache ioremap_cache + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size); +#define ioremap_wc ioremap_wc +#define ioremap_wt ioremap_wc + +void iounmap(volatile void __iomem *iomem_cookie); +#define iounmap iounmap /* * io{read,write}{16,32}be() macros diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index d1e5ad7ab3bc..0c81056c1dd7 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -255,7 +255,7 @@ remap_area_supersections(unsigned long virt, unsigned long pfn, } #endif -void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, +static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype, void *caller) { const struct mem_type *type; @@ -363,7 +363,7 @@ __arm_ioremap_pfn(unsigned long pfn, unsigned long offset, size_t size, unsigned int mtype) { return __arm_ioremap_pfn_caller(pfn, offset, size, mtype, - __builtin_return_address(0)); + __builtin_return_address(0)); } EXPORT_SYMBOL(__arm_ioremap_pfn); @@ -371,13 +371,26 @@ void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *) = __arm_ioremap_caller; -void __iomem * -__arm_ioremap(phys_addr_t phys_addr, size_t size, unsigned int mtype) +void __iomem *ioremap(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap); + +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); + +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) { - return arch_ioremap_caller(phys_addr, size, mtype, - __builtin_return_address(0)); + return arch_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap_wc); /* * Remap an arbitrary physical address space into the kernel virtual @@ -431,11 +444,11 @@ void __iounmap(volatile void __iomem *io_addr) void (*arch_iounmap)(volatile void __iomem *) = __iounmap; -void __arm_iounmap(volatile void __iomem *io_addr) +void iounmap(volatile void __iomem *cookie) { - arch_iounmap(io_addr); + arch_iounmap(cookie); } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); #ifdef CONFIG_PCI static int pci_ioremap_mem_type = MT_DEVICE; diff --git a/arch/arm/mm/nommu.c b/arch/arm/mm/nommu.c index afd7e05d95f1..1dd10936d68d 100644 --- a/arch/arm/mm/nommu.c +++ b/arch/arm/mm/nommu.c @@ -351,30 +351,43 @@ void __iomem *__arm_ioremap_pfn(unsigned long pfn, unsigned long offset, } EXPORT_SYMBOL(__arm_ioremap_pfn); -void __iomem *__arm_ioremap_pfn_caller(unsigned long pfn, unsigned long offset, - size_t size, unsigned int mtype, void *caller) +void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, + unsigned int mtype, void *caller) { - return __arm_ioremap_pfn(pfn, offset, size, mtype); + return (void __iomem *)phys_addr; } -void __iomem *__arm_ioremap(phys_addr_t phys_addr, size_t size, - unsigned int mtype) +void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); + +void __iomem *ioremap(resource_size_t res_cookie, size_t size) { - return (void __iomem *)phys_addr; + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE, + __builtin_return_address(0)); } -EXPORT_SYMBOL(__arm_ioremap); +EXPORT_SYMBOL(ioremap); -void __iomem * (*arch_ioremap_caller)(phys_addr_t, size_t, unsigned int, void *); +void __iomem *ioremap_cache(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_CACHED, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_cache); -void __iomem *__arm_ioremap_caller(phys_addr_t phys_addr, size_t size, - unsigned int mtype, void *caller) +void __iomem *ioremap_wc(resource_size_t res_cookie, size_t size) +{ + return __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC, + __builtin_return_address(0)); +} +EXPORT_SYMBOL(ioremap_wc); + +void __iounmap(volatile void __iomem *addr) { - return __arm_ioremap(phys_addr, size, mtype); } +EXPORT_SYMBOL(__iounmap); void (*arch_iounmap)(volatile void __iomem *); -void __arm_iounmap(volatile void __iomem *addr) +void iounmap(volatile void __iomem *addr) { } -EXPORT_SYMBOL(__arm_iounmap); +EXPORT_SYMBOL(iounmap); -- cgit From 9ab79bb22cc77adcca5ebbadea6caec6a478f283 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 1 Jul 2015 15:23:10 +0100 Subject: ARM: pgtable: document mapping types Signed-off-by: Russell King --- arch/arm/include/asm/pgtable-2level.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index bfd662e49a25..aeddd28b3595 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -129,7 +129,36 @@ /* * These are the memory types, defined to be compatible with - * pre-ARMv6 CPUs cacheable and bufferable bits: XXCB + * pre-ARMv6 CPUs cacheable and bufferable bits: n/a,n/a,C,B + * ARMv6+ without TEX remapping, they are a table index. + * ARMv6+ with TEX remapping, they correspond to n/a,TEX(0),C,B + * + * MT type Pre-ARMv6 ARMv6+ type / cacheable status + * UNCACHED Uncached Strongly ordered + * BUFFERABLE Bufferable Normal memory / non-cacheable + * WRITETHROUGH Writethrough Normal memory / write through + * WRITEBACK Writeback Normal memory / write back, read alloc + * MINICACHE Minicache N/A + * WRITEALLOC Writeback Normal memory / write back, write alloc + * DEV_SHARED Uncached Device memory (shared) + * DEV_NONSHARED Uncached Device memory (non-shared) + * DEV_WC Bufferable Normal memory / non-cacheable + * DEV_CACHED Writeback Normal memory / write back, read alloc + * VECTORS Variable Normal memory / variable + * + * All normal memory mappings have the following properties: + * - reads can be repeated with no side effects + * - repeated reads return the last value written + * - reads can fetch additional locations without side effects + * - writes can be repeated (in certain cases) with no side effects + * - writes can be merged before accessing the target + * - unaligned accesses can be supported + * + * All device mappings have the following properties: + * - no access speculation + * - no repetition (eg, on return from an exception) + * - number, order and size of accesses are maintained + * - unaligned accesses are "unpredictable" */ #define L_PTE_MT_UNCACHED (_AT(pteval_t, 0x00) << 2) /* 0000 */ #define L_PTE_MT_BUFFERABLE (_AT(pteval_t, 0x01) << 2) /* 0001 */ -- cgit From 1bd46782d08b01b73df0085b51ea1021b19b44fd Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 3 Jul 2015 15:22:54 +0100 Subject: ARM: avoid unwanted GCC memset()/memcpy() optimisations for IO variants We don't want GCC optimising our memset_io(), memcpy_fromio() or memcpy_toio() variants, so we must not call one of the standard functions. Provide a separate name for our assembly memcpy() and memset() functions, and use that instead, thereby bypassing GCC's ability to optimise these operations. GCCs optimisation may introduce unaligned accesses which are invalid for device mappings. Signed-off-by: Russell King --- arch/arm/include/asm/io.h | 9 ++++++--- arch/arm/kernel/armksyms.c | 6 ++++++ arch/arm/lib/memcpy.S | 2 ++ arch/arm/lib/memset.S | 2 ++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/io.h b/arch/arm/include/asm/io.h index a5ed237c87d9..485982084fe9 100644 --- a/arch/arm/include/asm/io.h +++ b/arch/arm/include/asm/io.h @@ -316,21 +316,24 @@ extern void _memset_io(volatile void __iomem *, int, size_t); static inline void memset_io(volatile void __iomem *dst, unsigned c, size_t count) { - memset((void __force *)dst, c, count); + extern void mmioset(void *, unsigned int, size_t); + mmioset((void __force *)dst, c, count); } #define memset_io(dst,c,count) memset_io(dst,c,count) static inline void memcpy_fromio(void *to, const volatile void __iomem *from, size_t count) { - memcpy(to, (const void __force *)from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy(to, (const void __force *)from, count); } #define memcpy_fromio(to,from,count) memcpy_fromio(to,from,count) static inline void memcpy_toio(volatile void __iomem *to, const void *from, size_t count) { - memcpy((void __force *)to, from, count); + extern void mmiocpy(void *, const void *, size_t); + mmiocpy((void __force *)to, from, count); } #define memcpy_toio(to,from,count) memcpy_toio(to,from,count) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..5e5a51a99e68 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -50,6 +50,9 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); +void mmioset(void *, unsigned int, size_t); +void mmiocpy(void *, const void *, size_t); + /* platform dependent support */ EXPORT_SYMBOL(arm_delay_ops); @@ -88,6 +91,9 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); +EXPORT_SYMBOL(mmioset); +EXPORT_SYMBOL(mmiocpy); + #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); diff --git a/arch/arm/lib/memcpy.S b/arch/arm/lib/memcpy.S index 7797e81e40e0..64111bd4440b 100644 --- a/arch/arm/lib/memcpy.S +++ b/arch/arm/lib/memcpy.S @@ -61,8 +61,10 @@ /* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(mmiocpy) ENTRY(memcpy) #include "copy_template.S" ENDPROC(memcpy) +ENDPROC(mmiocpy) diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index a4ee97b5a2bf..3c65e3bd790f 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -16,6 +16,7 @@ .text .align 5 +ENTRY(mmioset) ENTRY(memset) UNWIND( .fnstart ) ands r3, r0, #3 @ 1 unaligned? @@ -133,3 +134,4 @@ UNWIND( .fnstart ) b 1b UNWIND( .fnend ) ENDPROC(memset) +ENDPROC(mmioset) -- cgit From 9705acd63b125dee8b15c705216d7186daea4625 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 3 Jul 2015 21:13:55 -0400 Subject: ext4: fix reservation release on invalidatepage for delalloc fs On delalloc enabled file system on invalidatepage operation in ext4_da_page_release_reservation() we want to clear the delayed buffer and remove the extent covering the delayed buffer from the extent status tree. However currently there is a bug where on the systems with page size > block size we will always remove extents from the start of the page regardless where the actual delayed buffers are positioned in the page. This leads to the errors like this: EXT4-fs warning (device loop0): ext4_da_release_space:1225: ext4_da_release_space: ino 13, to_free 1 with only 0 reserved data blocks This however can cause data loss on writeback time if the file system is in ENOSPC condition because we're releasing reservation for someones else delayed buffer. Fix this by only removing extents that corresponds to the part of the page we want to invalidate. This problem is reproducible by the following fio receipt (however I was only able to reproduce it with fio-2.1 or older. [global] bs=8k iodepth=1024 iodepth_batch=60 randrepeat=1 size=1m directory=/mnt/test numjobs=20 [job1] ioengine=sync bs=1k direct=1 rw=randread filename=file1:file2 [job2] ioengine=libaio rw=randwrite direct=1 filename=file1:file2 [job3] bs=1k ioengine=posixaio rw=randwrite direct=1 filename=file1:file2 [job5] bs=1k ioengine=sync rw=randread filename=file1:file2 [job7] ioengine=libaio rw=randwrite filename=file1:file2 [job8] ioengine=posixaio rw=randwrite filename=file1:file2 [job10] ioengine=mmap rw=randwrite bs=1k filename=file1:file2 [job11] ioengine=mmap rw=randwrite direct=1 filename=file1:file2 Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara Cc: stable@vger.kernel.org --- fs/ext4/inode.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4ad73d3c1003..2334e86d7447 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1330,7 +1330,7 @@ static void ext4_da_page_release_reservation(struct page *page, unsigned int offset, unsigned int length) { - int to_release = 0; + int to_release = 0, contiguous_blks = 0; struct buffer_head *head, *bh; unsigned int curr_off = 0; struct inode *inode = page->mapping->host; @@ -1351,14 +1351,23 @@ static void ext4_da_page_release_reservation(struct page *page, if ((offset <= curr_off) && (buffer_delay(bh))) { to_release++; + contiguous_blks++; clear_buffer_delay(bh); + } else if (contiguous_blks) { + lblk = page->index << + (PAGE_CACHE_SHIFT - inode->i_blkbits); + lblk += (curr_off >> inode->i_blkbits) - + contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); + contiguous_blks = 0; } curr_off = next_off; } while ((bh = bh->b_this_page) != head); - if (to_release) { + if (contiguous_blks) { lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); - ext4_es_remove_extent(inode, lblk, to_release); + lblk += (curr_off >> inode->i_blkbits) - contiguous_blks; + ext4_es_remove_extent(inode, lblk, contiguous_blks); } /* If we have released all the blocks belonging to a cluster, then we -- cgit From d6f123a9297496ad0b6335fe881504c4b5b2a5e5 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Fri, 3 Jul 2015 23:56:50 -0400 Subject: ext4: be more strict when migrating to non-extent based file Currently the check in ext4_ind_migrate() is not enough before doing the real conversion: a) delayed allocated extents could bypass the check on eh->eh_entries and eh->eh_depth This can be demonstrated by this script xfs_io -fc "pwrite 0 4k" -c "pwrite 8k 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile where testfile has two extents but still be converted to non-extent based file format. b) only extent length is checked but not the offset, which would result in data lose (delalloc) or fs corruption (nodelalloc), because non-extent based file only supports at most (12 + 2^10 + 2^20 + 2^30) blocks This can be demostrated by xfs_io -fc "pwrite 5T 4k" /mnt/ext4/testfile chattr -e /mnt/ext4/testfile sync If delalloc is enabled, dmesg prints EXT4-fs warning (device dm-4): ext4_block_to_path:105: block 1342177280 > max in inode 53 EXT4-fs (dm-4): Delayed block allocation failed for inode 53 at logical offset 1342177280 with max blocks 1 with error 5 EXT4-fs (dm-4): This should not happen!! Data will be lost If delalloc is disabled, e2fsck -nf shows corruption Inode 53, i_size is 5497558142976, should be 4096. Fix? no Fix the two issues by a) forcing all delayed allocation blocks to be allocated before checking eh->eh_depth and eh->eh_entries b) limiting the last logical block of the extent is within direct map Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/migrate.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b52374e42102..6d8b0c917364 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,6 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; + ext4_lblk_t end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -633,6 +634,14 @@ int ext4_ind_migrate(struct inode *inode) EXT4_FEATURE_RO_COMPAT_BIGALLOC)) return -EOPNOTSUPP; + /* + * In order to get correct extent info, force all delayed allocation + * blocks to be allocated, otherwise delayed allocation blocks may not + * be reflected and bypass the checks on extent header. + */ + if (test_opt(inode->i_sb, DELALLOC)) + ext4_alloc_da_blocks(inode); + handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1); if (IS_ERR(handle)) return PTR_ERR(handle); @@ -654,7 +663,8 @@ int ext4_ind_migrate(struct inode *inode) else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - if (len > EXT4_NDIR_BLOCKS) { + end = le32_to_cpu(ex->ee_block) + len - 1; + if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; } -- cgit From 8974fec7d72e3e02752fe0f27b4c3719c78d9a15 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Sat, 4 Jul 2015 00:03:44 -0400 Subject: ext4: correctly migrate a file with a hole at the beginning Currently ext4_ind_migrate() doesn't correctly handle a file which contains a hole at the beginning of the file. This caused the migration to be done incorrectly, and then if there is a subsequent following delayed allocation write to the "hole", this would reclaim the same data blocks again and results in fs corruption. # assmuing 4k block size ext4, with delalloc enabled # skip the first block and write to the second block xfs_io -fc "pwrite 4k 4k" -c "fsync" /mnt/ext4/testfile # converting to indirect-mapped file, which would move the data blocks # to the beginning of the file, but extent status cache still marks # that region as a hole chattr -e /mnt/ext4/testfile # delayed allocation writes to the "hole", reclaim the same data block # again, results in i_blocks corruption xfs_io -c "pwrite 0 4k" /mnt/ext4/testfile umount /mnt/ext4 e2fsck -nf /dev/sda6 ... Inode 53, i_blocks is 16, should be 8. Fix? no ... Signed-off-by: Eryu Guan Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/migrate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 6d8b0c917364..6163ad21cb0e 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c @@ -620,7 +620,7 @@ int ext4_ind_migrate(struct inode *inode) struct ext4_inode_info *ei = EXT4_I(inode); struct ext4_extent *ex; unsigned int i, len; - ext4_lblk_t end; + ext4_lblk_t start, end; ext4_fsblk_t blk; handle_t *handle; int ret; @@ -659,11 +659,12 @@ int ext4_ind_migrate(struct inode *inode) goto errout; } if (eh->eh_entries == 0) - blk = len = 0; + blk = len = start = end = 0; else { len = le16_to_cpu(ex->ee_len); blk = ext4_ext_pblock(ex); - end = le32_to_cpu(ex->ee_block) + len - 1; + start = le32_to_cpu(ex->ee_block); + end = start + len - 1; if (end >= EXT4_NDIR_BLOCKS) { ret = -EOPNOTSUPP; goto errout; @@ -672,7 +673,7 @@ int ext4_ind_migrate(struct inode *inode) ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); memset(ei->i_data, 0, sizeof(ei->i_data)); - for (i=0; i < len; i++) + for (i = start; i <= end; i++) ei->i_data[i] = cpu_to_le32(blk++); ext4_mark_inode_dirty(handle, inode); errout: -- cgit From d90a45b1e005da3351de795d2ba6d52dc5a28837 Mon Sep 17 00:00:00 2001 From: Jens Kuske Date: Fri, 15 May 2015 18:38:51 +0200 Subject: Documentation: sunxi: Update Allwinner SoC documentation There are some new Allwinner SoCs available, namely A33, A83T and H3. Update the documentation to mention those and the related documents. Signed-off-by: Jens Kuske Signed-off-by: Maxime Ripard --- Documentation/arm/sunxi/README | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/Documentation/arm/sunxi/README b/Documentation/arm/sunxi/README index 1fe2d7fd4108..5e38e1582f95 100644 --- a/Documentation/arm/sunxi/README +++ b/Documentation/arm/sunxi/README @@ -36,7 +36,7 @@ SunXi family + User Manual http://dl.linux-sunxi.org/A20/A20%20User%20Manual%202013-03-22.pdf - - Allwinner A23 + - Allwinner A23 (sun8i) + Datasheet http://dl.linux-sunxi.org/A23/A23%20Datasheet%20V1.0%2020130830.pdf + User Manual @@ -55,7 +55,23 @@ SunXi family + User Manual http://dl.linux-sunxi.org/A31/A3x_release_document/A31s/IC/A31s%20User%20Manual%20%20V1.0%2020130322.pdf + - Allwinner A33 (sun8i) + + Datasheet + http://dl.linux-sunxi.org/A33/A33%20Datasheet%20release%201.1.pdf + + User Manual + http://dl.linux-sunxi.org/A33/A33%20user%20manual%20release%201.1.pdf + + - Allwinner H3 (sun8i) + + Datasheet + http://dl.linux-sunxi.org/H3/Allwinner_H3_Datasheet_V1.0.pdf + * Quad ARM Cortex-A15, Quad ARM Cortex-A7 based SoCs - Allwinner A80 + Datasheet http://dl.linux-sunxi.org/A80/A80_Datasheet_Revision_1.0_0404.pdf + + * Octa ARM Cortex-A7 based SoCs + - Allwinner A83T + + Not Supported + + Datasheet + http://dl.linux-sunxi.org/A83T/A83T_datasheet_Revision_1.1.pdf -- cgit From 14a882df14a5ae859b245bc708ce3fce47a91594 Mon Sep 17 00:00:00 2001 From: Jens Kuske Date: Fri, 15 May 2015 18:38:55 +0200 Subject: ARM: sunxi: Introduce Allwinner H3 support The Allwinner H3 is a quad-core Cortex-A7-based SoC. It is very similar to other sun8i family SoCs like the A23. Signed-off-by: Jens Kuske Signed-off-by: Maxime Ripard --- Documentation/devicetree/bindings/arm/sunxi.txt | 1 + arch/arm/mach-sunxi/Kconfig | 2 +- arch/arm/mach-sunxi/sunxi.c | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt index 42941fdefb11..3cb4b946ff2b 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.txt +++ b/Documentation/devicetree/bindings/arm/sunxi.txt @@ -9,4 +9,5 @@ using one of the following compatible strings: allwinner,sun6i-a31 allwinner,sun7i-a20 allwinner,sun8i-a23 + allwinner,sun8i-h3 allwinner,sun9i-a80 diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig index 81502b90dd91..4efe2d43a126 100644 --- a/arch/arm/mach-sunxi/Kconfig +++ b/arch/arm/mach-sunxi/Kconfig @@ -35,7 +35,7 @@ config MACH_SUN7I select SUN5I_HSTIMER config MACH_SUN8I - bool "Allwinner A23 (sun8i) SoCs support" + bool "Allwinner sun8i Family SoCs support" default ARCH_SUNXI select ARM_GIC select MFD_SUN6I_PRCM diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 1bc811a74a9f..82709020c57c 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -67,10 +67,12 @@ MACHINE_END static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-a23", + "allwinner,sun8i-h3", NULL, }; -DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i (A23) Family") +DT_MACHINE_START(SUN8I_DT, "Allwinner sun8i Family") + .init_time = sun6i_timer_init, .dt_compat = sun8i_board_dt_compat, .init_late = sunxi_dt_cpufreq_init, MACHINE_END -- cgit From 159870d2413c92622790e9cecbce95099bed539a Mon Sep 17 00:00:00 2001 From: Vishnu Patekar Date: Sat, 30 May 2015 16:55:01 +0200 Subject: ARM: sunxi: Add Machine support for A33 Add machine support for the Allwinner A33 quad core cortex-a7 based SoC, which is similar to the A23 SoC. Signed-off-by: Vishnu Patekar Signed-off-by: Hans de Goede Signed-off-by: Maxime Ripard Tested-by: Chen-Yu Tsai --- Documentation/devicetree/bindings/arm/sunxi.txt | 1 + arch/arm/mach-sunxi/sunxi.c | 1 + drivers/clk/sunxi/clk-sunxi.c | 1 + 3 files changed, 3 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/sunxi.txt b/Documentation/devicetree/bindings/arm/sunxi.txt index 3cb4b946ff2b..67da20539540 100644 --- a/Documentation/devicetree/bindings/arm/sunxi.txt +++ b/Documentation/devicetree/bindings/arm/sunxi.txt @@ -9,5 +9,6 @@ using one of the following compatible strings: allwinner,sun6i-a31 allwinner,sun7i-a20 allwinner,sun8i-a23 + allwinner,sun8i-a33 allwinner,sun8i-h3 allwinner,sun9i-a80 diff --git a/arch/arm/mach-sunxi/sunxi.c b/arch/arm/mach-sunxi/sunxi.c index 82709020c57c..65bab2876343 100644 --- a/arch/arm/mach-sunxi/sunxi.c +++ b/arch/arm/mach-sunxi/sunxi.c @@ -67,6 +67,7 @@ MACHINE_END static const char * const sun8i_board_dt_compat[] = { "allwinner,sun8i-a23", + "allwinner,sun8i-a33", "allwinner,sun8i-h3", NULL, }; diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c index 7e1e2bd189b6..6d25e4e9dfec 100644 --- a/drivers/clk/sunxi/clk-sunxi.c +++ b/drivers/clk/sunxi/clk-sunxi.c @@ -1389,6 +1389,7 @@ static void __init sun6i_init_clocks(struct device_node *node) CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks); CLK_OF_DECLARE(sun6i_a31s_clk_init, "allwinner,sun6i-a31s", sun6i_init_clocks); CLK_OF_DECLARE(sun8i_a23_clk_init, "allwinner,sun8i-a23", sun6i_init_clocks); +CLK_OF_DECLARE(sun8i_a33_clk_init, "allwinner,sun8i-a33", sun6i_init_clocks); static void __init sun9i_init_clocks(struct device_node *node) { -- cgit From 07d1f344159764bcd4e228e4a65e804dfe59dfe9 Mon Sep 17 00:00:00 2001 From: Timo Sigurdsson Date: Mon, 6 Apr 2015 22:57:22 +0200 Subject: ARM: Remove deprecated symbol from defconfig files Commit b2b3a8b934e6 ("power/reset: Remove sun6i reboot driver") removed the sun6i reboot driver. But sunxi_defconfig and multi_v7_defconfig still contain the symbol CONFIG_POWER_RESET_SUN6I that was deprecated by that commit, so remove it. Signed-off-by: Timo Sigurdsson Signed-off-by: Maxime Ripard --- arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/configs/sunxi_defconfig | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index ab86655c1f4b..22df0021d265 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -319,7 +319,6 @@ CONFIG_POWER_RESET_AS3722=y CONFIG_POWER_RESET_GPIO=y CONFIG_POWER_RESET_GPIO_RESTART=y CONFIG_POWER_RESET_KEYSTONE=y -CONFIG_POWER_RESET_SUN6I=y CONFIG_POWER_RESET_RMOBILE=y CONFIG_SENSORS_LM90=y CONFIG_SENSORS_LM95245=y diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 8ecba00dcd83..2c7af0a57cb1 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -77,7 +77,6 @@ CONFIG_SPI_SUN6I=y CONFIG_GPIO_SYSFS=y CONFIG_POWER_SUPPLY=y CONFIG_POWER_RESET=y -CONFIG_POWER_RESET_SUN6I=y CONFIG_THERMAL=y CONFIG_CPU_THERMAL=y CONFIG_WATCHDOG=y -- cgit From 5738563bf6f2f1631dc71a35293b135f90969a35 Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 20 Apr 2015 21:48:40 +0200 Subject: ARM: sunxi: Enable simplefb in the defconfig Now that we have simplefb support, we can enable it in our defconfig. Also enable the framebuffer console, so that we are sure that we actually get something displayed in any case. And while we're at it, enable the module support. Signed-off-by: Maxime Ripard --- arch/arm/configs/sunxi_defconfig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 2c7af0a57cb1..7ebc346bf9fa 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -2,6 +2,7 @@ CONFIG_NO_HZ=y CONFIG_HIGH_RES_TIMERS=y CONFIG_BLK_DEV_INITRD=y CONFIG_PERF_EVENTS=y +CONFIG_MODULES=y CONFIG_ARCH_SUNXI=y CONFIG_SMP=y CONFIG_NR_CPUS=8 @@ -86,6 +87,10 @@ CONFIG_REGULATOR=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_AXP20X=y CONFIG_REGULATOR_GPIO=y +CONFIG_FB=y +CONFIG_FB_SIMPLE=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y CONFIG_USB=y CONFIG_USB_EHCI_HCD=y CONFIG_USB_EHCI_HCD_PLATFORM=y -- cgit From 7444a072c387a93ebee7066e8aee776954ab0e41 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Sun, 5 Jul 2015 12:33:44 -0400 Subject: ext4: replace open coded nofail allocation in ext4_free_blocks() ext4_free_blocks is looping around the allocation request and mimics __GFP_NOFAIL behavior without any allocation fallback strategy. Let's remove the open coded loop and replace it with __GFP_NOFAIL. Without the flag the allocator has no way to find out never-fail requirement and cannot help in any way. Signed-off-by: Michal Hocko Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/ext4/mballoc.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 1c535fa67640..2299d629eeb1 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -4815,18 +4815,12 @@ do_more: /* * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed + * + * We use __GFP_NOFAIL because ext4_free_blocks() is not allowed + * to fail. */ - retry: - new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); - if (!new_entry) { - /* - * We use a retry loop because - * ext4_free_blocks() is not allowed to fail. - */ - cond_resched(); - congestion_wait(BLK_RW_ASYNC, HZ/50); - goto retry; - } + new_entry = kmem_cache_alloc(ext4_free_data_cachep, + GFP_NOFS|__GFP_NOFAIL); new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; new_entry->efd_count = count_clusters; -- cgit From 728abda6a6654ee7f4e903dc921c6307065e1644 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 14:12:42 -0300 Subject: tools: Adopt {READ,WRITE_ONCE} from the kernel We need it to build rbtree.c after this cset: commit d72da4a4d973 Author: Peter Zijlstra Date: Wed May 27 11:09:36 2015 +0930 rbtree: Make lockless searches non-fatal Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-qlnzhezv5ddwst0w9fydju0y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/compiler.h | 58 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index f0e72674c52d..9098083869c8 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -41,4 +41,62 @@ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) +#include + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + #endif /* _TOOLS_LINUX_COMPILER_H */ -- cgit From 4407f967441aa1adfc11f739e8e9ec0f38fa839f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 14:17:04 -0300 Subject: perf tools: Copy rbtree.h from the kernel We were using the include/linux/rbtree.h directly from the kernel, which broke the build as soon as it started using rcupdate.h, to avoid dragging the rcu header files into tools/, for which there is no use so far, grab a copy of rbtree.h. This is the minimal fix, later patches will copy as well lib/rbtree.c and move rbtree.h into tools/include/, etc. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-dfmuj0j63w4by7vhlh4hhn74@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/MANIFEST | 1 - tools/perf/util/include/linux/rbtree.h | 92 +++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 3 deletions(-) diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fe50a1b34aa0..6504b727f450 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -51,7 +51,6 @@ include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h include/linux/perf_event.h -include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h index f06d89f0b867..112582253dd0 100644 --- a/tools/perf/util/include/linux/rbtree.h +++ b/tools/perf/util/include/linux/rbtree.h @@ -1,7 +1,95 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + #ifndef __TOOLS_LINUX_PERF_RBTREE_H #define __TOOLS_LINUX_PERF_RBTREE_H -#include -#include "../../../../include/linux/rbtree.h" + +#include +#include + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + /* * Handy for checking that we are not deleting an entry that is -- cgit From 3f735377bfd6567d80815a6242c147211963680a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:48:21 -0300 Subject: tools: Copy lib/rbtree.c to tools/lib/ So that we can remove kernel specific stuff we've been stubbing out via a tools/include/linux/export.h that gets removed in this patch and to avoid breakages in the future like the one fixed recently where rcupdate.h started being used in rbtree.h. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-rxuzfsozpb8hv1emwpx06rm6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/export.h | 10 - tools/lib/rbtree.c | 548 +++++++++++++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 2 +- tools/perf/util/Build | 2 +- 4 files changed, 550 insertions(+), 12 deletions(-) delete mode 100644 tools/include/linux/export.h create mode 100644 tools/lib/rbtree.c diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h deleted file mode 100644 index d07e586b9ba0..000000000000 --- a/tools/include/linux/export.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _TOOLS_LINUX_EXPORT_H_ -#define _TOOLS_LINUX_EXPORT_H_ - -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) - -#endif diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c new file mode 100644 index 000000000000..17c2b596f043 --- /dev/null +++ b/tools/lib/rbtree.c @@ -0,0 +1,548 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 6504b727f450..ff667e36ca52 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -18,6 +18,7 @@ tools/arch/x86/include/asm/atomic.h tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent tools/lib/api +tools/lib/rbtree.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c @@ -55,7 +56,6 @@ include/linux/list.h include/linux/hash.h include/linux/stringify.h lib/hweight.c -lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h arch/*/include/uapi/asm/unistd*.h diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 586a59d46022..601d11440596 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -139,7 +139,7 @@ $(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c FORCE +$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -- cgit From 03da23a34a039a74f550646e68d562655306e331 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:54:01 -0300 Subject: tools: Move rbtree.h from tools/perf/ The previous step, copying the contents minus the rcupdate.h parts, was done as a minimal fix, now do the move from tools/perf/. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-52fllxtsgmtke66pmv98mcma@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/rbtree.h | 104 +++++++++++++++++++++++++++++++++ tools/perf/MANIFEST | 1 + tools/perf/util/include/linux/rbtree.h | 104 --------------------------------- 3 files changed, 105 insertions(+), 104 deletions(-) create mode 100644 tools/include/linux/rbtree.h delete mode 100644 tools/perf/util/include/linux/rbtree.h diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h new file mode 100644 index 000000000000..112582253dd0 --- /dev/null +++ b/tools/include/linux/rbtree.h @@ -0,0 +1,104 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + +#ifndef __TOOLS_LINUX_PERF_RBTREE_H +#define __TOOLS_LINUX_PERF_RBTREE_H + +#include +#include + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + + +/* + * Handy for checking that we are not deleting an entry that is + * already in a list, found in block/{blk-throttle,cfq-iosched}.c, + * probably should be moved to lib/rbtree.c... + */ +static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) +{ + rb_erase(n, root); + RB_CLEAR_NODE(n); +} +#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index ff667e36ca52..9df268167b8c 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -45,6 +45,7 @@ tools/include/linux/kernel.h tools/include/linux/list.h tools/include/linux/log2.h tools/include/linux/poison.h +tools/include/linux/rbtree.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h deleted file mode 100644 index 112582253dd0..000000000000 --- a/tools/perf/util/include/linux/rbtree.h +++ /dev/null @@ -1,104 +0,0 @@ -/* - Red Black Trees - (C) 1999 Andrea Arcangeli - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - - linux/include/linux/rbtree.h - - To use rbtrees you'll have to implement your own insert and search cores. - This will avoid us to use callbacks and to drop drammatically performances. - I know it's not the cleaner way, but in C (not in C++) to get - performances and genericity... - - See Documentation/rbtree.txt for documentation and samples. -*/ - -#ifndef __TOOLS_LINUX_PERF_RBTREE_H -#define __TOOLS_LINUX_PERF_RBTREE_H - -#include -#include - -struct rb_node { - unsigned long __rb_parent_color; - struct rb_node *rb_right; - struct rb_node *rb_left; -} __attribute__((aligned(sizeof(long)))); - /* The alignment might seem pointless, but allegedly CRIS needs it */ - -struct rb_root { - struct rb_node *rb_node; -}; - - -#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) - -#define RB_ROOT (struct rb_root) { NULL, } -#define rb_entry(ptr, type, member) container_of(ptr, type, member) - -#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) - -/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ -#define RB_EMPTY_NODE(node) \ - ((node)->__rb_parent_color == (unsigned long)(node)) -#define RB_CLEAR_NODE(node) \ - ((node)->__rb_parent_color = (unsigned long)(node)) - - -extern void rb_insert_color(struct rb_node *, struct rb_root *); -extern void rb_erase(struct rb_node *, struct rb_root *); - - -/* Find logical next and previous nodes in a tree */ -extern struct rb_node *rb_next(const struct rb_node *); -extern struct rb_node *rb_prev(const struct rb_node *); -extern struct rb_node *rb_first(const struct rb_root *); -extern struct rb_node *rb_last(const struct rb_root *); - -/* Postorder iteration - always visit the parent after its children */ -extern struct rb_node *rb_first_postorder(const struct rb_root *); -extern struct rb_node *rb_next_postorder(const struct rb_node *); - -/* Fast replacement of a single node without remove/rebalance/add/rebalance */ -extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, - struct rb_root *root); - -static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, - struct rb_node **rb_link) -{ - node->__rb_parent_color = (unsigned long)parent; - node->rb_left = node->rb_right = NULL; - - *rb_link = node; -} - -#define rb_entry_safe(ptr, type, member) \ - ({ typeof(ptr) ____ptr = (ptr); \ - ____ptr ? rb_entry(____ptr, type, member) : NULL; \ - }) - - -/* - * Handy for checking that we are not deleting an entry that is - * already in a list, found in block/{blk-throttle,cfq-iosched}.c, - * probably should be moved to lib/rbtree.c... - */ -static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) -{ - rb_erase(n, root); - RB_CLEAR_NODE(n); -} -#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ -- cgit From 307bc971959aaa2df44032e7f6b0bda1f7e26890 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 5 Jul 2015 22:59:05 -0300 Subject: tools: Copy rbtree_augmented.h from the kernel To complete the transitioning to not to share the same files with the kernel, also moving it from tools/perf/include/linux/ to tools/include/linux to make the whoke rbtree kit to other tools/ living codebases. Cc: Adrian Hunter Cc: Borislav Petkov Cc: David Ahern Cc: Frederic Weisbecker Cc: Jiri Olsa Cc: Namhyung Kim Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-5bxyehixafckqm6ez25alnfo@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/rbtree_augmented.h | 245 +++++++++++++++++++++++ tools/perf/MANIFEST | 2 +- tools/perf/util/include/linux/rbtree_augmented.h | 2 - 3 files changed, 246 insertions(+), 3 deletions(-) create mode 100644 tools/include/linux/rbtree_augmented.h delete mode 100644 tools/perf/util/include/linux/rbtree_augmented.h diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h new file mode 100644 index 000000000000..43be941db695 --- /dev/null +++ b/tools/include/linux/rbtree_augmented.h @@ -0,0 +1,245 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli + (C) 2002 David Woodhouse + (C) 2012 Michel Lespinasse + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + tools/linux/include/linux/rbtree_augmented.h + + Copied from: + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H +#define _TOOLS_LINUX_RBTREE_AUGMENTED_H + +#include +#include + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index 9df268167b8c..09dc0aabb515 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -46,6 +46,7 @@ tools/include/linux/list.h tools/include/linux/log2.h tools/include/linux/poison.h tools/include/linux/rbtree.h +tools/include/linux/rbtree_augmented.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h @@ -65,7 +66,6 @@ arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h include/linux/hw_breakpoint.h -include/linux/rbtree_augmented.h include/uapi/linux/perf_event.h include/uapi/linux/const.h include/uapi/linux/swab.h diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h deleted file mode 100644 index 9d6fcdf1788b..000000000000 --- a/tools/perf/util/include/linux/rbtree_augmented.h +++ /dev/null @@ -1,2 +0,0 @@ -#include -#include "../../../../include/linux/rbtree_augmented.h" -- cgit From d8ea782b56d9d2c46a47b3231cfd16ecfb538c60 Mon Sep 17 00:00:00 2001 From: Vaidyanathan Srinivasan Date: Mon, 29 Jun 2015 10:47:55 +0530 Subject: powerpc/powernv: Fix vma page prot flags in opal-prd driver opal-prd driver will mmap() firmware code/data area as private mapping to prd user space daemon. Write to this page will trigger COW faults. The new COW pages are normal kernel RAM pages accounted by the kernel and are not special. vma->vm_page_prot value will be used at page fault time for the new COW pages, while pgprot_t value passed in remap_pfn_range() is used for the initial page table entry. Hence: * Do not add _PAGE_SPECIAL in vma, but only for remap_pfn_range() * Also remap_pfn_range() will add the _PAGE_SPECIAL flag using pte_mkspecial() call, hence no need to specify in the driver This fix resolves the page accounting warning shown below: BUG: Bad rss-counter state mm:c0000007d34ac600 idx:1 val:19 The above warning is triggered since _PAGE_SPECIAL was incorrectly being set for the normal kernel COW pages. Signed-off-by: Vaidyanathan Srinivasan Acked-by: Jeremy Kerr Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-prd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 46cb3feb0a13..4ece8e40dd54 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -112,6 +112,7 @@ static int opal_prd_open(struct inode *inode, struct file *file) static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) { size_t addr, size; + pgprot_t page_prot; int rc; pr_devel("opal_prd_mmap(0x%016lx, 0x%016lx, 0x%lx, 0x%lx)\n", @@ -125,13 +126,11 @@ static int opal_prd_mmap(struct file *file, struct vm_area_struct *vma) if (!opal_prd_range_is_valid(addr, size)) return -EINVAL; - vma->vm_page_prot = __pgprot(pgprot_val(phys_mem_access_prot(file, - vma->vm_pgoff, - size, vma->vm_page_prot)) - | _PAGE_SPECIAL); + page_prot = phys_mem_access_prot(file, vma->vm_pgoff, + size, vma->vm_page_prot); rc = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, - vma->vm_page_prot); + page_prot); return rc; } -- cgit From 5aac644a9944bea93b4f05ced1883a902a2535f6 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 3 Jun 2015 10:39:46 +0300 Subject: x86/tsc: Let high latency PIT fail fast in quick_pit_calibrate() If it takes longer than 12us to read the PIT counter lsb/msb, then the error margin will never fall below 500ppm within 50ms, and Fast TSC calibration will always fail. This patch detects when that will happen and fails fast. Note the failure message is not printed in that case because: 1. it will always happen on that class of hardware 2. the absence of the message is more informative than its presence Signed-off-by: Adrian Hunter Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Len Brown Cc: Andi Kleen Link: http://lkml.kernel.org/r/556EB717.9070607@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0c..7437b41f6a47 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -598,10 +598,19 @@ static unsigned long quick_pit_calibrate(void) if (!pit_expect_msb(0xff-i, &delta, &d2)) break; + delta -= tsc; + + /* + * Extrapolate the error and fail fast if the error will + * never be below 500 ppm. + */ + if (i == 1 && + d1 + d2 >= (delta * MAX_QUICK_PIT_ITERATIONS) >> 11) + return 0; + /* * Iterate until the error is less than 500 ppm */ - delta -= tsc; if (d1+d2 >= delta >> 11) continue; -- cgit From 0d7b6b1182ef6f72be592688c8a22025a5b7b483 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Thu, 2 Jul 2015 14:29:58 +0300 Subject: drm/i915/chv: fix HW readout of the port PLL fractional divider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ville noticed that the PLL HW readout code parsed the fractional divider value as if the fractional divider was always enabled. This may result in a port clock state check mismatch if the preceeding modeset disabled the fractional divider, but left a non-zero divider value in the register. Signed-off-by: Imre Deak Reviewed-by: Ville Syrjälä Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 1b61f9810387..8aa803848f35 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -7887,7 +7887,7 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, int pipe = pipe_config->cpu_transcoder; enum dpio_channel port = vlv_pipe_to_channel(pipe); intel_clock_t clock; - u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2; + u32 cmn_dw13, pll_dw0, pll_dw1, pll_dw2, pll_dw3; int refclk = 100000; mutex_lock(&dev_priv->sb_lock); @@ -7895,10 +7895,13 @@ static void chv_crtc_clock_get(struct intel_crtc *crtc, pll_dw0 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW0(port)); pll_dw1 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW1(port)); pll_dw2 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW2(port)); + pll_dw3 = vlv_dpio_read(dev_priv, pipe, CHV_PLL_DW3(port)); mutex_unlock(&dev_priv->sb_lock); clock.m1 = (pll_dw1 & 0x7) == DPIO_CHV_M1_DIV_BY_2 ? 2 : 0; - clock.m2 = ((pll_dw0 & 0xff) << 22) | (pll_dw2 & 0x3fffff); + clock.m2 = (pll_dw0 & 0xff) << 22; + if (pll_dw3 & DPIO_CHV_FRAC_DIV_EN) + clock.m2 |= pll_dw2 & 0x3fffff; clock.n = (pll_dw1 >> DPIO_CHV_N_DIV_SHIFT) & 0xf; clock.p1 = (cmn_dw13 >> DPIO_CHV_P1_DIV_SHIFT) & 0x7; clock.p2 = (cmn_dw13 >> DPIO_CHV_P2_DIV_SHIFT) & 0x1f; -- cgit From 14f21189df33bc972455d6a0ed875aa68718d7fc Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Mon, 29 Jun 2015 16:05:11 +0530 Subject: cxl/vphb.c: Use phb pointer after NULL check static Anlaysis detected below error:- (error) Possible null pointer dereference: phb So, Use phb after NULL check. Signed-off-by: Maninder Singh Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/vphb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c index b1d1983a84a5..2eba002b580b 100644 --- a/drivers/misc/cxl/vphb.c +++ b/drivers/misc/cxl/vphb.c @@ -112,9 +112,10 @@ static int cxl_pcie_config_info(struct pci_bus *bus, unsigned int devfn, unsigned long addr; phb = pci_bus_to_host(bus); - afu = (struct cxl_afu *)phb->private_data; if (phb == NULL) return PCIBIOS_DEVICE_NOT_FOUND; + afu = (struct cxl_afu *)phb->private_data; + if (cxl_pcie_cfg_record(bus->number, devfn) > afu->crs_num) return PCIBIOS_DEVICE_NOT_FOUND; if (offset >= (unsigned long)phb->cfg_data) -- cgit From 27ea2c420cad57386c25668cb9a9f0f082635586 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 15 Jun 2015 13:25:19 +1000 Subject: powerpc: Set the correct kernel taint on machine check errors. This means the 'M' flag will work properly when the kernel prints a backtrace. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/traps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 6530f1b8874d..37de90f8a845 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -297,6 +297,8 @@ long machine_check_early(struct pt_regs *regs) __this_cpu_inc(irq_stat.mce_exceptions); + add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); + if (cur_cpu_spec && cur_cpu_spec->machine_check_early) handled = cur_cpu_spec->machine_check_early(regs); return handled; -- cgit From 8c00d5c9d3e7452658dda55024485d52919ebfdd Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Thu, 2 Jul 2015 15:55:21 +1000 Subject: cxl: Test the correct mmio space before unmapping Before freeing p2n, test p2n, not p1n. Signed-off-by: Daniel Axtens Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index c68ef5806dbe..32ad09705949 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -539,7 +539,7 @@ err: static void cxl_unmap_slice_regs(struct cxl_afu *afu) { - if (afu->p1n_mmio) + if (afu->p2n_mmio) iounmap(afu->p2n_mmio); if (afu->p1n_mmio) iounmap(afu->p1n_mmio); -- cgit From eab861a7a52208868637f47a92caa926ddadd9c7 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 2 Jul 2015 14:56:20 +1000 Subject: powerpc: Add plain English description for alignment exception oopses If we take an alignment exception which we cannot fix, the oops currently prints: Unable to handle kernel paging request for unknown fault Lets print something more useful: Unable to handle kernel paging request for unaligned access at address 0xc0000000f77bba8f Signed-off-by: Anton Blanchard Signed-off-by: Michael Ellerman --- arch/powerpc/mm/fault.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 6d535973b200..a67c6d781c52 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -529,6 +529,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) printk(KERN_ALERT "Unable to handle kernel paging request for " "instruction fetch\n"); break; + case 0x600: + printk(KERN_ALERT "Unable to handle kernel paging request for " + "unaligned access at address 0x%08lx\n", regs->dar); + break; default: printk(KERN_ALERT "Unable to handle kernel paging request for " "unknown fault\n"); -- cgit From aaf6fd5c75eb4aa734ec2062deab371633c3655f Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Mon, 6 Jul 2015 09:40:34 +1000 Subject: powerpc/ppc4xx_hsta_msi: Include ppc-pci.h to fix reference to hose_list An earlier commit referenced 'hose_list' in sysdev/ppc4xx_hsta_msi.c. hose_list is defined in ppc-pci.h, which was not included in that file. Include it, fixing the build for the akebono defconfig used by the kbuild test robot. Fixes: f2c800aaceb6 ("powerpc/ppc4xx_hsta_msi: Move MSI-related ops to pci_controller_ops") Reported-by: kbuild test robot Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/ppc4xx_hsta_msi.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c index 2bc33674ebfc..87f9623ca805 100644 --- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c @@ -18,6 +18,7 @@ #include #include #include +#include struct ppc4xx_hsta_msi { struct device *dev; -- cgit From a8956a7b7232da5f4ce4a305c72a54cc5e4a8307 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Fri, 3 Jul 2015 17:39:12 +1000 Subject: powerpc/powernv: Fix opal-elog interrupt handler The conversion of opal events to a proper irqchip means that handlers are called until the relevant opal event has been cleared by processing it. Events that queue work should therefore use a threaded handler to mask the event until processing is complete. Signed-off-by: Alistair Popple Tested-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-elog.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 4949ef0d9400..37f959bf392e 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -237,7 +237,7 @@ static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) return elog; } -static void elog_work_fn(struct work_struct *work) +static irqreturn_t elog_event(int irq, void *data) { __be64 size; __be64 id; @@ -251,7 +251,7 @@ static void elog_work_fn(struct work_struct *work) rc = opal_get_elog_size(&id, &size, &type); if (rc != OPAL_SUCCESS) { pr_err("ELOG: OPAL log info read failed\n"); - return; + return IRQ_HANDLED; } elog_size = be64_to_cpu(size); @@ -270,16 +270,10 @@ static void elog_work_fn(struct work_struct *work) * entries. */ if (kset_find_obj(elog_kset, name)) - return; + return IRQ_HANDLED; create_elog_obj(log_id, elog_size, elog_type); -} - -static DECLARE_WORK(elog_work, elog_work_fn); -static irqreturn_t elog_event(int irq, void *data) -{ - schedule_work(&elog_work); return IRQ_HANDLED; } @@ -304,8 +298,8 @@ int __init opal_elog_init(void) return irq; } - rc = request_irq(irq, elog_event, - IRQ_TYPE_LEVEL_HIGH, "opal-elog", NULL); + rc = request_threaded_irq(irq, NULL, elog_event, + IRQF_TRIGGER_HIGH | IRQF_ONESHOT, "opal-elog", NULL); if (rc) { pr_err("%s: Can't request OPAL event irq (%d)\n", __func__, rc); -- cgit From 57ffc5ca679f499f4704fd9b6a372916f59930ee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Jun 2015 12:32:49 +0200 Subject: perf: Fix AUX buffer refcounting Its currently possible to drop the last refcount to the aux buffer from NMI context, which results in the expected fireworks. The refcounting needs a bigger overhaul, but to cure the immediate problem, delay the freeing by using an irq_work. Reviewed-and-tested-by: Alexander Shishkin Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150618103249.GK19282@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 -------- kernel/events/internal.h | 10 ++++++++++ kernel/events/ring_buffer.c | 27 +++++++++++++++++++++++++-- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index e965cfae4207..d3dae3419b99 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4358,14 +4358,6 @@ static void ring_buffer_wakeup(struct perf_event *event) rcu_read_unlock(); } -static void rb_free_rcu(struct rcu_head *rcu_head) -{ - struct ring_buffer *rb; - - rb = container_of(rcu_head, struct ring_buffer, rcu_head); - rb_free(rb); -} - struct ring_buffer *ring_buffer_get(struct perf_event *event) { struct ring_buffer *rb; diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2deb24c7a40d..2bbad9c1274c 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -11,6 +11,7 @@ struct ring_buffer { atomic_t refcount; struct rcu_head rcu_head; + struct irq_work irq_work; #ifdef CONFIG_PERF_USE_VMALLOC struct work_struct work; int page_order; /* allocation order */ @@ -55,6 +56,15 @@ struct ring_buffer { }; extern void rb_free(struct ring_buffer *rb); + +static inline void rb_free_rcu(struct rcu_head *rcu_head) +{ + struct ring_buffer *rb; + + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); +} + extern struct ring_buffer * rb_alloc(int nr_pages, long watermark, int cpu, int flags); extern void perf_event_wakeup(struct perf_event *event); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 96472824a752..b2be01b1aa9d 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -221,6 +221,8 @@ void perf_output_end(struct perf_output_handle *handle) rcu_read_unlock(); } +static void rb_irq_work(struct irq_work *work); + static void ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) { @@ -241,6 +243,16 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags) INIT_LIST_HEAD(&rb->event_list); spin_lock_init(&rb->event_lock); + init_irq_work(&rb->irq_work, rb_irq_work); +} + +static void ring_buffer_put_async(struct ring_buffer *rb) +{ + if (!atomic_dec_and_test(&rb->refcount)) + return; + + rb->rcu_head.next = (void *)rb; + irq_work_queue(&rb->irq_work); } /* @@ -319,7 +331,7 @@ err_put: rb_free_aux(rb); err: - ring_buffer_put(rb); + ring_buffer_put_async(rb); handle->event = NULL; return NULL; @@ -370,7 +382,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, local_set(&rb->aux_nest, 0); rb_free_aux(rb); - ring_buffer_put(rb); + ring_buffer_put_async(rb); } /* @@ -557,7 +569,18 @@ static void __rb_free_aux(struct ring_buffer *rb) void rb_free_aux(struct ring_buffer *rb) { if (atomic_dec_and_test(&rb->aux_refcount)) + irq_work_queue(&rb->irq_work); +} + +static void rb_irq_work(struct irq_work *work) +{ + struct ring_buffer *rb = container_of(work, struct ring_buffer, irq_work); + + if (!atomic_read(&rb->aux_refcount)) __rb_free_aux(rb); + + if (rb->rcu_head.next == (void *)rb) + call_rcu(&rb->rcu_head, rb_free_rcu); } #ifndef CONFIG_PERF_USE_VMALLOC -- cgit From ebf2d2689de551d90965090bb991fc640a0c0d41 Mon Sep 17 00:00:00 2001 From: Yann Droneaud Date: Mon, 22 Jun 2015 21:38:43 +0200 Subject: perf/x86: Fix copy_from_user_nmi() return if range is not ok Commit 0a196848ca36 ("perf: Fix arch_perf_out_copy_user default"), changes copy_from_user_nmi() to return the number of remaining bytes so that it behave like copy_from_user(). Unfortunately, when the range is outside of the process memory, the return value is still the number of byte copied, eg. 0, instead of the remaining bytes. As all users of copy_from_user_nmi() were modified as part of commit 0a196848ca36, the function should be fixed to return the total number of bytes if range is not correct. Signed-off-by: Yann Droneaud Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435001923-30986-1-git-send-email-ydroneaud@opteya.com Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index ddf9ecb53cc3..e342586db6e4 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -20,7 +20,7 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) unsigned long ret; if (__range_not_ok(from, n, TASK_SIZE)) - return 0; + return n; /* * Even though this function is typically called from NMI/IRQ context -- cgit From 5c250adb51ca318b67ac3a230a565dbe2ea9f0ef Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 11 Jun 2015 16:18:38 +0200 Subject: Revert "ARM: dts: am335x-boneblack: disable RTC-only sleep" This reverts commit 3d76be5b933e2a66d85a2f7444e68e99e8a48ad4. The latest revision of Beaglebone Black does not support RTC-only mode. To avoid potential hardware damage, RTC-only mode was disabled by default by commit 7a6cb0abe1aa ("ARM: dts: am335x-boneblack: disable RTC-only sleep to avoid hardware damage"). Unfortunately, an incorrect fix had already been applied, which instead of just disabling RTC-only mode, prevents the Beaglebone from powering down at all. Revert this patch to fix the power-off regression. Signed-off-by: Johan Hovold Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-boneblack.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/am335x-boneblack.dts b/arch/arm/boot/dts/am335x-boneblack.dts index 901739fcb85a..5c42d259fa68 100644 --- a/arch/arm/boot/dts/am335x-boneblack.dts +++ b/arch/arm/boot/dts/am335x-boneblack.dts @@ -80,3 +80,7 @@ status = "okay"; }; }; + +&rtc { + system-power-controller; +}; -- cgit From fff75ee150104fdc29d86a75a5511482a981c27b Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Wed, 6 May 2015 12:25:33 -0500 Subject: ARM: dts: am4372: Add emif node Add node for TI AM4372 EMIF. Without this we get a warning with the recent commit fabbe6df (ARM: OMAP: AM43xx hwmod: Add data for am43xx emif hwmod). Signed-off-by: Dave Gerlach Tested-by: Felipe Balbi Acked-by: Felipe Balbi [tony@atomide.com: updated comments] Signed-off-by: Tony Lindgren --- Documentation/devicetree/bindings/memory-controllers/ti/emif.txt | 1 + arch/arm/boot/dts/am4372.dtsi | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt index 938f8e1ba205..0db60470ebb6 100644 --- a/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt +++ b/Documentation/devicetree/bindings/memory-controllers/ti/emif.txt @@ -8,6 +8,7 @@ of the EMIF IP and memory parts attached to it. Required properties: - compatible : Should be of the form "ti,emif-" where is the IP revision of the specific EMIF instance. + For am437x should be ti,emif-am4372. - phy-type : indicating the DDR phy type. Following are the allowed values diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index c80a3e233792..9521a3827021 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -132,6 +132,12 @@ }; }; + emif: emif@4c000000 { + compatible = "ti,emif-am4372"; + reg = <0x4c000000 0x1000000>; + ti,hwmods = "emif"; + }; + edma: edma@49000000 { compatible = "ti,edma3"; ti,hwmods = "tpcc", "tptc0", "tptc1", "tptc2"; -- cgit From 9ab402aed38b95d9ce453108622be0fc6f167568 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 17 Jun 2015 17:52:43 +0300 Subject: ARM: dts: am57xx-beagle-x15: Provide supply for usb2_phy2 Without this USB2 breaks if USB1 is disabled or USB1 initializes after USB2 e.g. due to deferred probing. Fixes: 5a0f93c6576a ("ARM: dts: Add am57xx-beagle-x15") Signed-off-by: Roger Quadros Cc: stable@vger.kernel.org (v3.19+) Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am57xx-beagle-x15.dts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/boot/dts/am57xx-beagle-x15.dts b/arch/arm/boot/dts/am57xx-beagle-x15.dts index a42cc377a862..a63bf78191ea 100644 --- a/arch/arm/boot/dts/am57xx-beagle-x15.dts +++ b/arch/arm/boot/dts/am57xx-beagle-x15.dts @@ -605,6 +605,10 @@ phy-supply = <&ldousb_reg>; }; +&usb2_phy2 { + phy-supply = <&ldousb_reg>; +}; + &usb1 { dr_mode = "host"; pinctrl-names = "default"; -- cgit From 22a5dc10e3f8fb8370748ea19dc4e3e1620d8296 Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Tue, 30 Jun 2015 15:04:54 +0300 Subject: ARM: dts: am4372.dtsi: disable rfbi When DSS nodes were added to am4372.dtsi, the rfbi node was not marked as disabled. This should have been done, as the rule of thumb is to disable all DSS nodes that are not used, and especially rfbi, as we don't have a driver for rfbi. Signed-off-by: Tomi Valkeinen Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am4372.dtsi | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/boot/dts/am4372.dtsi b/arch/arm/boot/dts/am4372.dtsi index 9521a3827021..ade28c790f4b 100644 --- a/arch/arm/boot/dts/am4372.dtsi +++ b/arch/arm/boot/dts/am4372.dtsi @@ -947,6 +947,7 @@ ti,hwmods = "dss_rfbi"; clocks = <&disp_clk>; clock-names = "fck"; + status = "disabled"; }; }; -- cgit From d0f77d4d04b222a817925d33ba3589b190bfa863 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:33 +0300 Subject: x86/init: Clear 'init_level4_pgt' earlier Currently x86_64_start_kernel() has two KASAN related function calls. The first call maps shadow to early_level4_pgt, the second maps shadow to init_level4_pgt. If we move clear_page(init_level4_pgt) earlier, we could hide KASAN low level detail from generic x86_64 initialization code. The next patch will do it. Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-2-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/head64.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 5a4668136e98..65c8985e3eb2 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -166,6 +166,8 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); + clear_page(init_level4_pgt); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -177,7 +179,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) */ load_ucode_bsp(); - clear_page(init_level4_pgt); /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; -- cgit From 5d5aa3cfca5cf74cd928daf3674642e6004328d1 Mon Sep 17 00:00:00 2001 From: Alexander Popov Date: Thu, 2 Jul 2015 12:09:34 +0300 Subject: x86/kasan: Fix KASAN shadow region page tables Currently KASAN shadow region page tables created without respect of physical offset (phys_base). This causes kernel halt when phys_base is not zero. So let's initialize KASAN shadow region page tables in kasan_early_init() using __pa_nodebug() which considers phys_base. This patch also separates x86_64_start_kernel() from KASAN low level details by moving kasan_map_early_shadow(init_level4_pgt) into kasan_early_init(). Remove the comment before clear_bss() which stopped bringing much profit to the code readability. Otherwise describing all the new order dependencies would be too verbose. Signed-off-by: Alexander Popov Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-3-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/kasan.h | 8 ++------ arch/x86/kernel/head64.c | 7 ++----- arch/x86/kernel/head_64.S | 29 ----------------------------- arch/x86/mm/kasan_init_64.c | 36 ++++++++++++++++++++++++++++++++++-- 4 files changed, 38 insertions(+), 42 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 8b22422fbad8..74a2a8dc9908 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -14,15 +14,11 @@ #ifndef __ASSEMBLY__ -extern pte_t kasan_zero_pte[]; -extern pte_t kasan_zero_pmd[]; -extern pte_t kasan_zero_pud[]; - #ifdef CONFIG_KASAN -void __init kasan_map_early_shadow(pgd_t *pgd); +void __init kasan_early_init(void); void __init kasan_init(void); #else -static inline void kasan_map_early_shadow(pgd_t *pgd) { } +static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } #endif diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 65c8985e3eb2..f129a9af6357 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -161,13 +161,12 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* Kill off the identity-map trampoline */ reset_early_page_tables(); - kasan_map_early_shadow(early_level4_pgt); - - /* clear bss before set_intr_gate with early_idt_handler */ clear_bss(); clear_page(init_level4_pgt); + kasan_early_init(); + for (i = 0; i < NUM_EXCEPTION_VECTORS; i++) set_intr_gate(i, early_idt_handler_array[i]); load_idt((const struct desc_ptr *)&idt_descr); @@ -182,8 +181,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data) /* set init_level4_pgt kernel high mapping*/ init_level4_pgt[511] = early_level4_pgt[511]; - kasan_map_early_shadow(init_level4_pgt); - x86_64_start_reservations(real_mode_data); } diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e5c27f729a38..1d40ca8a73f2 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -516,38 +516,9 @@ ENTRY(phys_base) /* This must match the first entry in level2_kernel_pgt */ .quad 0x0000000000000000 -#ifdef CONFIG_KASAN -#define FILL(VAL, COUNT) \ - .rept (COUNT) ; \ - .quad (VAL) ; \ - .endr - -NEXT_PAGE(kasan_zero_pte) - FILL(kasan_zero_page - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pmd) - FILL(kasan_zero_pte - __START_KERNEL_map + _KERNPG_TABLE, 512) -NEXT_PAGE(kasan_zero_pud) - FILL(kasan_zero_pmd - __START_KERNEL_map + _KERNPG_TABLE, 512) - -#undef FILL -#endif - - #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS NEXT_PAGE(empty_zero_page) .skip PAGE_SIZE -#ifdef CONFIG_KASAN -/* - * This page used as early shadow. We don't use empty_zero_page - * at early stages, stack instrumentation could write some garbage - * to this page. - * Latter we reuse it as zero shadow for large ranges of memory - * that allowed to access, but not instrumented by kasan - * (vmalloc/vmemmap ...). - */ -NEXT_PAGE(kasan_zero_page) - .skip PAGE_SIZE -#endif diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 4860906c6b9f..0e4a05fa34d7 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -11,7 +11,19 @@ extern pgd_t early_level4_pgt[PTRS_PER_PGD]; extern struct range pfn_mapped[E820_X_MAX]; -extern unsigned char kasan_zero_page[PAGE_SIZE]; +static pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; +static pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss; +static pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss; + +/* + * This page used as early shadow. We don't use empty_zero_page + * at early stages, stack instrumentation could write some garbage + * to this page. + * Latter we reuse it as zero shadow for large ranges of memory + * that allowed to access, but not instrumented by kasan + * (vmalloc/vmemmap ...). + */ +static unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; static int __init map_range(struct range *range) { @@ -36,7 +48,7 @@ static void __init clear_pgds(unsigned long start, pgd_clear(pgd_offset_k(start)); } -void __init kasan_map_early_shadow(pgd_t *pgd) +static void __init kasan_map_early_shadow(pgd_t *pgd) { int i; unsigned long start = KASAN_SHADOW_START; @@ -166,6 +178,26 @@ static struct notifier_block kasan_die_notifier = { }; #endif +void __init kasan_early_init(void) +{ + int i; + pteval_t pte_val = __pa_nodebug(kasan_zero_page) | __PAGE_KERNEL; + pmdval_t pmd_val = __pa_nodebug(kasan_zero_pte) | _KERNPG_TABLE; + pudval_t pud_val = __pa_nodebug(kasan_zero_pmd) | _KERNPG_TABLE; + + for (i = 0; i < PTRS_PER_PTE; i++) + kasan_zero_pte[i] = __pte(pte_val); + + for (i = 0; i < PTRS_PER_PMD; i++) + kasan_zero_pmd[i] = __pmd(pmd_val); + + for (i = 0; i < PTRS_PER_PUD; i++) + kasan_zero_pud[i] = __pud(pud_val); + + kasan_map_early_shadow(early_level4_pgt); + kasan_map_early_shadow(init_level4_pgt); +} + void __init kasan_init(void) { int i; -- cgit From 241d2c54c62fa0939fc9a9512b48ac3434e90a89 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:35 +0300 Subject: x86/kasan: Flush TLBs after switching CR3 load_cr3() doesn't cause tlb_flush if PGE enabled. This may cause tons of false positive reports spamming the kernel to death. To fix this __flush_tlb_all() should be called explicitly after CR3 changed. Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-4-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0e4a05fa34d7..5d26642e4e8a 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -208,6 +208,7 @@ void __init kasan_init(void) memcpy(early_level4_pgt, init_level4_pgt, sizeof(early_level4_pgt)); load_cr3(early_level4_pgt); + __flush_tlb_all(); clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); @@ -234,5 +235,6 @@ void __init kasan_init(void) memset(kasan_zero_page, 0, PAGE_SIZE); load_cr3(init_level4_pgt); + __flush_tlb_all(); init_task.kasan_depth = 0; } -- cgit From d4f86beacc21d538dc41e1fc75a22e084f547edf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:36 +0300 Subject: x86/kasan: Fix boot crash on AMD processors While populating zero shadow wrong bits in upper level page tables used. __PAGE_KERNEL_RO that was used for pgd/pud/pmd has _PAGE_BIT_GLOBAL set. Global bit is present only in the lowest level of the page translation hierarchy (ptes), and it should be zero in upper levels. This bug seems doesn't cause any troubles on Intel cpus, while on AMDs it cause kernel crash on boot. Use _KERNPG_TABLE bits for pgds/puds/pmds to fix this. Reported-by: Borislav Petkov Signed-off-by: Andrey Ryabinin Cc: # 4.0+ Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-5-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 5d26642e4e8a..9a54dbe98064 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -85,7 +85,7 @@ static int __init zero_pmd_populate(pud_t *pud, unsigned long addr, while (IS_ALIGNED(addr, PMD_SIZE) && addr + PMD_SIZE <= end) { WARN_ON(!pmd_none(*pmd)); set_pmd(pmd, __pmd(__pa_nodebug(kasan_zero_pte) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PMD_SIZE; pmd = pmd_offset(pud, addr); } @@ -111,7 +111,7 @@ static int __init zero_pud_populate(pgd_t *pgd, unsigned long addr, while (IS_ALIGNED(addr, PUD_SIZE) && addr + PUD_SIZE <= end) { WARN_ON(!pud_none(*pud)); set_pud(pud, __pud(__pa_nodebug(kasan_zero_pmd) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PUD_SIZE; pud = pud_offset(pgd, addr); } @@ -136,7 +136,7 @@ static int __init zero_pgd_populate(unsigned long addr, unsigned long end) while (IS_ALIGNED(addr, PGDIR_SIZE) && addr + PGDIR_SIZE <= end) { WARN_ON(!pgd_none(*pgd)); set_pgd(pgd, __pgd(__pa_nodebug(kasan_zero_pud) - | __PAGE_KERNEL_RO)); + | _KERNPG_TABLE)); addr += PGDIR_SIZE; pgd = pgd_offset_k(addr); } -- cgit From 8515522949951d81fe2d06c0a3292f171f2b8ec4 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:37 +0300 Subject: x86/kasan: Add message about KASAN being initialized Print informational message to tell user that kernel runs with KASAN enabled. Add a "kasan: " prefix to all messages in kasan_init_64.c. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-6-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kasan_init_64.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 9a54dbe98064..e1840f3db5b5 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define pr_fmt(fmt) "kasan: " fmt #include #include #include @@ -237,4 +238,6 @@ void __init kasan_init(void) load_cr3(init_level4_pgt); __flush_tlb_all(); init_task.kasan_depth = 0; + + pr_info("Kernel address sanitizer initialized\n"); } -- cgit From d6f2d75a7ae06ffd793bb504c4f0d1665548cffc Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Thu, 2 Jul 2015 12:09:38 +0300 Subject: x86/kasan: Move KASAN_SHADOW_OFFSET to the arch Kconfig KASAN_SHADOW_OFFSET is purely arch specific setting, so it should be in arch's Kconfig file. Signed-off-by: Andrey Ryabinin Cc: Alexander Popov Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Paul Bolle Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1435828178-10975-7-git-send-email-a.ryabinin@samsung.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 5 +++++ lib/Kconfig.kasan | 4 ---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 55bced17dc95..0b2929f49531 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -254,6 +254,11 @@ config ARCH_SUPPORTS_OPTIMIZED_INLINING config ARCH_SUPPORTS_DEBUG_PAGEALLOC def_bool y +config KASAN_SHADOW_OFFSET + hex + depends on KASAN + default 0xdffffc0000000000 + config HAVE_INTEL_TXT def_bool y depends on INTEL_IOMMU && ACPI diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 777eda7d1ab4..39f24d6721e5 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -18,10 +18,6 @@ config KASAN For better error detection enable CONFIG_STACKTRACE, and add slub_debug=U to boot cmdline. -config KASAN_SHADOW_OFFSET - hex - default 0xdffffc0000000000 if X86_64 - choice prompt "Instrumentation type" depends on KASAN -- cgit From 1db875631f8d5bbf497f67e47f254eece888d51d Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Fri, 3 Jul 2015 17:37:18 +0800 Subject: x86/espfix: Add 'cpu' parameter to init_espfix_ap() Add a CPU index parameter to init_espfix_ap(), so that the parameter could be propagated to the function for espfix page allocation. Signed-off-by: Zhu Guihua Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/cde3fcf1b3211f3f03feb1a995bce3fee850f0fc.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/espfix.h | 2 +- arch/x86/kernel/espfix_64.c | 7 +++---- arch/x86/kernel/smpboot.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/espfix.h b/arch/x86/include/asm/espfix.h index 99efebb2f69d..ca3ce9ab9385 100644 --- a/arch/x86/include/asm/espfix.h +++ b/arch/x86/include/asm/espfix.h @@ -9,7 +9,7 @@ DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_stack); DECLARE_PER_CPU_READ_MOSTLY(unsigned long, espfix_waddr); extern void init_espfix_bsp(void); -extern void init_espfix_ap(void); +extern void init_espfix_ap(int cpu); #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index f5d0730e7b08..1fb0e0003c94 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -131,12 +131,12 @@ void __init init_espfix_bsp(void) init_espfix_random(); /* The rest is the same as for any other processor */ - init_espfix_ap(); + init_espfix_ap(0); } -void init_espfix_ap(void) +void init_espfix_ap(int cpu) { - unsigned int cpu, page; + unsigned int page; unsigned long addr; pud_t pud, *pud_p; pmd_t pmd, *pmd_p; @@ -149,7 +149,6 @@ void init_espfix_ap(void) if (likely(this_cpu_read(espfix_stack))) return; /* Already initialized */ - cpu = smp_processor_id(); addr = espfix_base_addr(cpu); page = cpu/ESPFIX_STACKS_PER_PAGE; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 8add66b22f33..6f5abac6c28b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -242,7 +242,7 @@ static void notrace start_secondary(void *unused) * Enable the espfix hack for this CPU */ #ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(); + init_espfix_ap(smp_processor_id()); #endif /* -- cgit From 20d5e4a9cd453991e2590a4c25230a99b42dee0c Mon Sep 17 00:00:00 2001 From: Zhu Guihua Date: Fri, 3 Jul 2015 17:37:19 +0800 Subject: x86/espfix: Init espfix on the boot CPU side As we alloc pages with GFP_KERNEL in init_espfix_ap() which is called before we enable local irqs, so the lockdep sub-system would (correctly) trigger a warning about the potentially blocking API. So we allocate them on the boot CPU side when the secondary CPU is brought up by the boot CPU, and hand them over to the secondary CPU. And we use alloc_pages_node() with the secondary CPU's node, to make sure the espfix stack is NUMA-local to the CPU that is going to use it. Signed-off-by: Zhu Guihua Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/c97add2670e9abebb90095369f0cfc172373ac94.1435824469.git.zhugh.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/espfix_64.c | 21 +++++++++++++-------- arch/x86/kernel/smpboot.c | 14 +++++++------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/espfix_64.c b/arch/x86/kernel/espfix_64.c index 1fb0e0003c94..ce95676abd60 100644 --- a/arch/x86/kernel/espfix_64.c +++ b/arch/x86/kernel/espfix_64.c @@ -141,12 +141,12 @@ void init_espfix_ap(int cpu) pud_t pud, *pud_p; pmd_t pmd, *pmd_p; pte_t pte, *pte_p; - int n; + int n, node; void *stack_page; pteval_t ptemask; /* We only have to do this once... */ - if (likely(this_cpu_read(espfix_stack))) + if (likely(per_cpu(espfix_stack, cpu))) return; /* Already initialized */ addr = espfix_base_addr(cpu); @@ -164,12 +164,15 @@ void init_espfix_ap(int cpu) if (stack_page) goto unlock_done; + node = cpu_to_node(cpu); ptemask = __supported_pte_mask; pud_p = &espfix_pud_page[pud_index(addr)]; pud = *pud_p; if (!pud_present(pud)) { - pmd_p = (pmd_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pmd_p = (pmd_t *)page_address(page); pud = __pud(__pa(pmd_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pmd(&init_mm, __pa(pmd_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PUD_CLONES; n++) @@ -179,7 +182,9 @@ void init_espfix_ap(int cpu) pmd_p = pmd_offset(&pud, addr); pmd = *pmd_p; if (!pmd_present(pmd)) { - pte_p = (pte_t *)__get_free_page(PGALLOC_GFP); + struct page *page = alloc_pages_node(node, PGALLOC_GFP, 0); + + pte_p = (pte_t *)page_address(page); pmd = __pmd(__pa(pte_p) | (PGTABLE_PROT & ptemask)); paravirt_alloc_pte(&init_mm, __pa(pte_p) >> PAGE_SHIFT); for (n = 0; n < ESPFIX_PMD_CLONES; n++) @@ -187,7 +192,7 @@ void init_espfix_ap(int cpu) } pte_p = pte_offset_kernel(&pmd, addr); - stack_page = (void *)__get_free_page(GFP_KERNEL); + stack_page = page_address(alloc_pages_node(node, GFP_KERNEL, 0)); pte = __pte(__pa(stack_page) | (__PAGE_KERNEL_RO & ptemask)); for (n = 0; n < ESPFIX_PTE_CLONES; n++) set_pte(&pte_p[n*PTE_STRIDE], pte); @@ -198,7 +203,7 @@ void init_espfix_ap(int cpu) unlock_done: mutex_unlock(&espfix_init_mutex); done: - this_cpu_write(espfix_stack, addr); - this_cpu_write(espfix_waddr, (unsigned long)stack_page - + (addr & ~PAGE_MASK)); + per_cpu(espfix_stack, cpu) = addr; + per_cpu(espfix_waddr, cpu) = (unsigned long)stack_page + + (addr & ~PAGE_MASK); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6f5abac6c28b..0bd8c1d507b3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -238,13 +238,6 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); - /* - * Enable the espfix hack for this CPU - */ -#ifdef CONFIG_X86_ESPFIX64 - init_espfix_ap(smp_processor_id()); -#endif - /* * We need to hold vector_lock so there the set of online cpus * does not change while we are assigning vectors to cpus. Holding @@ -854,6 +847,13 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) initial_code = (unsigned long)start_secondary; stack_start = idle->thread.sp; + /* + * Enable the espfix hack for this CPU + */ +#ifdef CONFIG_X86_ESPFIX64 + init_espfix_ap(cpu); +#endif + /* So we see what's up */ announce_cpu(cpu, apicid); -- cgit From 827a82ff399523a954253dfea401af748640f0f4 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 6 Jul 2015 10:14:34 -0400 Subject: x86/earlyprintk: Allow early_printk() to use console style parameters like '115200n8' When I enable early_printk on a kernel, I cut and paste the console= input and add to earlyprintk parameter. But I notice recently that ktest has not been detecting triple faults. The way it detects it, is by seeing the kernel banner "Linux version .." with a different kernel version pop up. Then I noticed that early printk was no longer working on my console, which was why ktest was not seeing it. I bisected it down and it was added to 4.0 with this commit: ea9e9d802902 ("Specify PCI based UART for earlyprintk") because it converted the simple_strtoul() that converts the baud number into a kstrtoul(). The problem with this is, I had as my baud rate, 115200n8 (acceptable for console=ttyS0), but because of the "n8", the kstrtoul() doesn't parse the baud rate and returns an error, which sets the baud rate to the default 9600. This explains the garbage on my screen. Now, earlyprintk= kernel parameter does not say it accepts that format. Thus, one answer would simply be me changing my kernel parameters to remove the "n8" since it isn't parsed anyway. But I wonder if other people run into this, and it seems strange that the two consoles for serial accepts different input. I could also extend this to have earlyprintk do something with that "n8" or whatever it has and have it match the console parsing (which, BTW, still uses simple_strtoul(), as I guess it has to). This patch just makes my old kernel parameter parsing work like it use to. Although, simple_strtoull() is considered obsolete, it is the only standard string parsing function that parses a number that is attached to text. Ironically, commit ea9e9d802902 also added several calls to simple_strtoul()! Signed-off-by: Steven Rostedt Cc: Alan Cox Cc: Andy Lutomirski Cc: Andy Shevchenko Cc: Borislav Petkov Cc: Brian Gerst Cc: David Cohen Cc: Denys Vlasenko Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stuart R. Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20150706101434.5f6a351b@gandalf.local.home [ Cleaned it up a bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/early_printk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 89427d8d4fc5..eec40f595ab9 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -175,7 +175,9 @@ static __init void early_serial_init(char *s) } if (*s) { - if (kstrtoul(s, 0, &baud) < 0 || baud == 0) + baud = simple_strtoull(s, &e, 0); + + if (baud == 0 || s == e) baud = DEFAULT_BAUD; } -- cgit From 4b59246d9aeaac71f5f7a29459cd2abe32c527e6 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 6 Jul 2015 17:08:09 +0200 Subject: arm64: remove another unnecessary libfdt include path Patch 63a4aea55670 ("of: clean-up unnecessary libfdt include paths") removed all explicit libfdt include paths, since those are no longer necessary after the latest dtc upgrade. However, this one snuck in during the same merge window. Remove it. Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/mm/Makefile | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/mm/Makefile b/arch/arm64/mm/Makefile index 9d84feb41a16..773d37a14039 100644 --- a/arch/arm64/mm/Makefile +++ b/arch/arm64/mm/Makefile @@ -4,5 +4,3 @@ obj-y := dma-mapping.o extable.o fault.o init.o \ context.o proc.o pageattr.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_ARM64_PTDUMP) += dump.o - -CFLAGS_mmu.o := -I$(srctree)/scripts/dtc/libfdt/ -- cgit From 3446af31b721d839c69f8fc70b8a434df6176a64 Mon Sep 17 00:00:00 2001 From: Suneel Garapati Date: Mon, 29 Jun 2015 07:02:08 +0200 Subject: arm64: defconfig: Add Ceva ahci to the defconfig The Ceva ahci controller is available on the Xilinx Zynq UltraScale+ MPSoC. Signed-off-by: Suneel Garapati Signed-off-by: Michal Simek [catalin.marinas@arm.com: removed unnecessary defconfig changes] Signed-off-by: Catalin Marinas --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index f38c94f1d898..4e17e7ede33d 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -83,6 +83,7 @@ CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y +CONFIG_AHCI_CEVA=y CONFIG_AHCI_XGENE=y CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y -- cgit From 6d451367bfa16fc103604bacd258f534c65d1540 Mon Sep 17 00:00:00 2001 From: Hai Li Date: Thu, 25 Jun 2015 18:35:33 -0400 Subject: clk: qcom: Use parent rate when set rate to pixel RCG clock Since the parent rate has been recalculated, pixel RCG clock should rely on it to find the correct M/N values during set_rate, instead of calling __clk_round_rate() to its parent again. Signed-off-by: Hai Li Tested-by: Archit Taneja Fixes: 99cbd064b059 ("clk: qcom: Support display RCG clocks") [sboyd@codeaurora.org: Silenced unused parent variable warning] Signed-off-by: Stephen Boyd --- drivers/clk/qcom/clk-rcg2.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c index b95d17fbb8d7..92936f0912d2 100644 --- a/drivers/clk/qcom/clk-rcg2.c +++ b/drivers/clk/qcom/clk-rcg2.c @@ -530,19 +530,16 @@ static int clk_pixel_set_rate(struct clk_hw *hw, unsigned long rate, struct clk_rcg2 *rcg = to_clk_rcg2(hw); struct freq_tbl f = *rcg->freq_tbl; const struct frac_entry *frac = frac_table_pixel; - unsigned long request, src_rate; + unsigned long request; int delta = 100000; u32 mask = BIT(rcg->hid_width) - 1; u32 hid_div; - int index = qcom_find_src_index(hw, rcg->parent_map, f.src); - struct clk *parent = clk_get_parent_by_index(hw->clk, index); for (; frac->num; frac++) { request = (rate * frac->den) / frac->num; - src_rate = __clk_round_rate(parent, request); - if ((src_rate < (request - delta)) || - (src_rate > (request + delta))) + if ((parent_rate < (request - delta)) || + (parent_rate > (request + delta))) continue; regmap_read(rcg->clkr.regmap, rcg->cmd_rcgr + CFG_REG, -- cgit From c14bada8f71eef63d4465aa8e3a479104a06e7c7 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:21 +0200 Subject: drivers: clk: st: Remove unused code Remove this duplicated code due to a bad copy / paste. Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index e94197f04b0b..099ed6050d25 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -1082,10 +1082,6 @@ static const struct of_device_id quadfs_of_match[] = { .compatible = "st,stih407-quadfs660-D", .data = &st_fs660c32_D_407 }, - { - .compatible = "st,stih407-quadfs660-D", - .data = (void *)&st_fs660c32_D_407 - }, {} }; -- cgit From c4d339c69f91c20def1366c1db8af6ac7a3013a9 Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:22 +0200 Subject: drivers: clk: st: Fix FSYN channel values This patch fixes the value for disabling the FSYN channel clock. The 'is_enabled' returned value is also fixed. Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 099ed6050d25..95851c464478 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -489,7 +489,7 @@ static int quadfs_pll_is_enabled(struct clk_hw *hw) struct st_clk_quadfs_pll *pll = to_quadfs_pll(hw); u32 npda = CLKGEN_READ(pll, npda); - return !!npda; + return pll->data->powerup_polarity ? !npda : !!npda; } static int clk_fs660c32_vco_get_rate(unsigned long input, struct stm_fs *fs, @@ -774,7 +774,7 @@ static void quadfs_fsynth_disable(struct clk_hw *hw) if (fs->lock) spin_lock_irqsave(fs->lock, flags); - CLKGEN_WRITE(fs, nsb[fs->chan], !fs->data->standby_polarity); + CLKGEN_WRITE(fs, nsb[fs->chan], fs->data->standby_polarity); if (fs->lock) spin_unlock_irqrestore(fs->lock, flags); -- cgit From 0f4f2afd4402883a51ad27a1d9e046643bb1e3cb Mon Sep 17 00:00:00 2001 From: Giuseppe Cavallaro Date: Tue, 23 Jun 2015 16:09:23 +0200 Subject: drivers: clk: st: Fix flexgen lock init While proving lock, the following warning happens and it is fixed after initializing lock in the setup function INFO: trying to register non-static key. the code is fine but needs lockdep annotation. turning off the locking correctness validator. CPU: 0 PID: 0 Comm: swapper/0 Not tainted 3.10.27-02861-g39df285-dirty #33 [] (unwind_backtrace+0x0/0xf4) from [] (show_stack+0x10/0x14) [] (show_stack+0x10/0x14) from [] (__lock_acquire+0x900/0xb14) [] (__lock_acquire+0x900/0xb14) from [] (lock_acquire+0x68/0x7c) [] (lock_acquire+0x68/0x7c) from [] (_raw_spin_lock_irqsave+0x48/0x5c) [] (_raw_spin_lock_irqsave+0x48/0x5c) from [] (clk_gate_endisable+0x28/0x88) [] (clk_gate_endisable+0x28/0x88) from [] (clk_gate_enable+0xc/0x14) [] (clk_gate_enable+0xc/0x14) from [] (flexgen_enable+0x28/0x40) [] (flexgen_enable+0x28/0x40) from [] (__clk_enable+0x5c/0x9c) [] (__clk_enable+0x5c/0x9c) from [] (clk_enable+0x18/0x2c) [] (clk_enable+0x18/0x2c) from [] (st_lpc_of_register+0xc0/0x248) [] (st_lpc_of_register+0xc0/0x248) from [] (clocksource_of_init+0x34/0x58) [] (clocksource_of_init+0x34/0x58) from [] (sti_timer_init+0x10/0x18) [] (sti_timer_init+0x10/0x18) from [] (time_init+0x20/0x30) [] (time_init+0x20/0x30) from [] (start_kernel+0x20c/0x2e8) [] (start_kernel+0x20c/0x2e8) from [<40008074>] (0x40008074) Signed-off-by: Giuseppe Cavallaro Signed-off-by: Gabriel Fernandez Fixes: b116517055b7 ("clk: st: STiH407: Support for Flexgen Clocks") Signed-off-by: Stephen Boyd --- drivers/clk/st/clk-flexgen.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index 657ca14ba709..be06d2a79dce 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -303,6 +303,8 @@ static void __init st_of_flexgen_setup(struct device_node *np) if (!rlock) goto err; + spin_lock_init(rlock); + for (i = 0; i < clk_data->clk_num; i++) { struct clk *clk; const char *clk_name; -- cgit From 18fee4538fe534c53fa95fe9eaa7f96586814e0a Mon Sep 17 00:00:00 2001 From: Pankaj Dev Date: Tue, 23 Jun 2015 16:09:24 +0200 Subject: drivers: clk: st: Add CLK_GET_RATE_NOCACHE flag to clocks Add the CLK_GET_RATE_NOCACHE flag to all the clocks with recalc ops, so that they reflect Hw rate after CPS wake-up when a clk_get_rate() is called Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Signed-off-by: Stephen Boyd --- drivers/clk/st/clk-flexgen.c | 2 +- drivers/clk/st/clkgen-fsyn.c | 2 +- drivers/clk/st/clkgen-mux.c | 8 +++++--- drivers/clk/st/clkgen-pll.c | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/clk/st/clk-flexgen.c b/drivers/clk/st/clk-flexgen.c index be06d2a79dce..8dd8cce27361 100644 --- a/drivers/clk/st/clk-flexgen.c +++ b/drivers/clk/st/clk-flexgen.c @@ -190,7 +190,7 @@ static struct clk *clk_register_flexgen(const char *name, init.name = name; init.ops = &flexgen_ops; - init.flags = CLK_IS_BASIC | flexgen_flags; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE | flexgen_flags; init.parent_names = parent_names; init.num_parents = num_parents; diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 95851c464478..99c98c15d4a4 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -635,7 +635,7 @@ static struct clk * __init st_clk_register_quadfs_pll( init.name = name; init.ops = quadfs->pll_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index 4fbe6e099587..bd30f8d4e902 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -237,7 +237,7 @@ static struct clk *clk_register_genamux(const char *name, init.name = name; init.ops = &clkgena_divmux_ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = parent_names; init.num_parents = num_parents; @@ -513,7 +513,8 @@ static void __init st_of_clkgena_prediv_setup(struct device_node *np) 0, &clk_name)) return; - clk = clk_register_divider_table(NULL, clk_name, parent_name, 0, + clk = clk_register_divider_table(NULL, clk_name, parent_name, + CLK_GET_RATE_NOCACHE, reg + data->offset, data->shift, 1, 0, data->table, NULL); if (IS_ERR(clk)) @@ -786,7 +787,8 @@ static void __init st_of_clkgen_vcc_setup(struct device_node *np) &mux->hw, &clk_mux_ops, &div->hw, &clk_divider_ops, &gate->hw, &clk_gate_ops, - data->clk_flags); + data->clk_flags | + CLK_GET_RATE_NOCACHE); if (IS_ERR(clk)) { kfree(gate); kfree(div); diff --git a/drivers/clk/st/clkgen-pll.c b/drivers/clk/st/clkgen-pll.c index 106532207213..72d1c27eaffa 100644 --- a/drivers/clk/st/clkgen-pll.c +++ b/drivers/clk/st/clkgen-pll.c @@ -406,7 +406,7 @@ static struct clk * __init clkgen_pll_register(const char *parent_name, init.name = clk_name; init.ops = pll_data->ops; - init.flags = CLK_IS_BASIC; + init.flags = CLK_IS_BASIC | CLK_GET_RATE_NOCACHE; init.parent_names = &parent_name; init.num_parents = 1; -- cgit From 3be6d8ce639d92e60d144fb99dd74a53fe3799bb Mon Sep 17 00:00:00 2001 From: Gabriel Fernandez Date: Tue, 23 Jun 2015 16:09:25 +0200 Subject: drivers: clk: st: Fix mux bit-setting for Cortex A9 clocks This patch fixes the mux bit-setting for ClockgenA9. Signed-off-by: Gabriel Fernandez Fixes: 13e6f2da1ddf ("clk: st: STiH407: Support for A9 MUX Clocks") Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-mux.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-mux.c b/drivers/clk/st/clkgen-mux.c index bd30f8d4e902..717c4a91a17b 100644 --- a/drivers/clk/st/clkgen-mux.c +++ b/drivers/clk/st/clkgen-mux.c @@ -583,7 +583,7 @@ static struct clkgen_mux_data stih416_a9_mux_data = { }; static struct clkgen_mux_data stih407_a9_mux_data = { .offset = 0x1a4, - .shift = 1, + .shift = 0, .width = 2, }; -- cgit From 0294112ee3135fbd15eaa70015af8283642dd970 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 4 Jul 2015 03:09:03 +0200 Subject: ACPI / PNP: Reserve ACPI resources at the fs_initcall_sync stage This effectively reverts the following three commits: 7bc10388ccdd ACPI / resources: free memory on error in add_region_before() 0f1b414d1907 ACPI / PNP: Avoid conflicting resource reservations b9a5e5e18fbf ACPI / init: Fix the ordering of acpi_reserve_resources() (commit b9a5e5e18fbf introduced regressions some of which, but not all, were addressed by commit 0f1b414d1907 and commit 7bc10388ccdd was a fixup on top of the latter) and causes ACPI fixed hardware resources to be reserved at the fs_initcall_sync stage of system initialization. The story is as follows. First, a boot regression was reported due to an apparent resource reservation ordering change after a commit that shouldn't lead to such changes. Investigation led to the conclusion that the problem happened because acpi_reserve_resources() was executed at the device_initcall() stage of system initialization which wasn't strictly ordered with respect to driver initialization (and with respect to the initialization of the pcieport driver in particular), so a random change causing the device initcalls to be run in a different order might break things. The response to that was to attempt to run acpi_reserve_resources() as soon as we knew that ACPI would be in use (commit b9a5e5e18fbf). However, that turned out to be too early, because it caused resource reservations made by the PNP system driver to fail on at least one system and that failure was addressed by commit 0f1b414d1907. That fix still turned out to be insufficient, though, because calling acpi_reserve_resources() before the fs_initcall stage of system initialization caused a boot regression to happen on the eCAFE EC-800-H20G/S netbook. That meant that we only could call acpi_reserve_resources() at the fs_initcall initialization stage or later, but then we might just as well call it after the PNP initalization in which case commit 0f1b414d1907 wouldn't be necessary any more. For this reason, the changes made by commit 0f1b414d1907 are reverted (along with a memory leak fixup on top of that commit), the changes made by commit b9a5e5e18fbf that went too far are reverted too and acpi_reserve_resources() is changed into fs_initcall_sync, which will cause it to be executed after the PNP subsystem initialization (which is an fs_initcall) and before device initcalls (including the pcieport driver initialization) which should avoid the initial issue. Link: https://bugzilla.kernel.org/show_bug.cgi?id=100581 Link: http://marc.info/?t=143092384600002&r=1&w=2 Link: https://bugzilla.kernel.org/show_bug.cgi?id=99831 Link: http://marc.info/?t=143389402600001&r=1&w=2 Fixes: b9a5e5e18fbf "ACPI / init: Fix the ordering of acpi_reserve_resources()" Reported-by: Roland Dreier Cc: All applicable Signed-off-by: Rafael J. Wysocki --- drivers/acpi/osl.c | 12 +++- drivers/acpi/resource.c | 162 ------------------------------------------------ drivers/pnp/system.c | 35 +++-------- include/linux/acpi.h | 10 --- 4 files changed, 18 insertions(+), 201 deletions(-) diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index c262e4acd68d..3b8963f21b36 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -175,10 +175,14 @@ static void __init acpi_request_region (struct acpi_generic_address *gas, if (!addr || !length) return; - acpi_reserve_region(addr, length, gas->space_id, 0, desc); + /* Resources are never freed */ + if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_IO) + request_region(addr, length, desc); + else if (gas->space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) + request_mem_region(addr, length, desc); } -static void __init acpi_reserve_resources(void) +static int __init acpi_reserve_resources(void) { acpi_request_region(&acpi_gbl_FADT.xpm1a_event_block, acpi_gbl_FADT.pm1_event_length, "ACPI PM1a_EVT_BLK"); @@ -207,7 +211,10 @@ static void __init acpi_reserve_resources(void) if (!(acpi_gbl_FADT.gpe1_block_length & 0x1)) acpi_request_region(&acpi_gbl_FADT.xgpe1_block, acpi_gbl_FADT.gpe1_block_length, "ACPI GPE1_BLK"); + + return 0; } +fs_initcall_sync(acpi_reserve_resources); void acpi_os_printf(const char *fmt, ...) { @@ -1862,7 +1869,6 @@ acpi_status __init acpi_os_initialize(void) acpi_status __init acpi_os_initialize1(void) { - acpi_reserve_resources(); kacpid_wq = alloc_workqueue("kacpid", 0, 1); kacpi_notify_wq = alloc_workqueue("kacpi_notify", 0, 1); kacpi_hotplug_wq = alloc_ordered_workqueue("kacpi_hotplug", 0); diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c index 10561ce16ed1..8244f013f210 100644 --- a/drivers/acpi/resource.c +++ b/drivers/acpi/resource.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #ifdef CONFIG_X86 @@ -622,164 +621,3 @@ int acpi_dev_filter_resource_type(struct acpi_resource *ares, return (type & types) ? 0 : 1; } EXPORT_SYMBOL_GPL(acpi_dev_filter_resource_type); - -struct reserved_region { - struct list_head node; - u64 start; - u64 end; -}; - -static LIST_HEAD(reserved_io_regions); -static LIST_HEAD(reserved_mem_regions); - -static int request_range(u64 start, u64 end, u8 space_id, unsigned long flags, - char *desc) -{ - unsigned int length = end - start + 1; - struct resource *res; - - res = space_id == ACPI_ADR_SPACE_SYSTEM_IO ? - request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (!res) - return -EIO; - - res->flags &= ~flags; - return 0; -} - -static int add_region_before(u64 start, u64 end, u8 space_id, - unsigned long flags, char *desc, - struct list_head *head) -{ - struct reserved_region *reg; - int error; - - reg = kmalloc(sizeof(*reg), GFP_KERNEL); - if (!reg) - return -ENOMEM; - - error = request_range(start, end, space_id, flags, desc); - if (error) { - kfree(reg); - return error; - } - - reg->start = start; - reg->end = end; - list_add_tail(®->node, head); - return 0; -} - -/** - * acpi_reserve_region - Reserve an I/O or memory region as a system resource. - * @start: Starting address of the region. - * @length: Length of the region. - * @space_id: Identifier of address space to reserve the region from. - * @flags: Resource flags to clear for the region after requesting it. - * @desc: Region description (for messages). - * - * Reserve an I/O or memory region as a system resource to prevent others from - * using it. If the new region overlaps with one of the regions (in the given - * address space) already reserved by this routine, only the non-overlapping - * parts of it will be reserved. - * - * Returned is either 0 (success) or a negative error code indicating a resource - * reservation problem. It is the code of the first encountered error, but the - * routine doesn't abort until it has attempted to request all of the parts of - * the new region that don't overlap with other regions reserved previously. - * - * The resources requested by this routine are never released. - */ -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc) -{ - struct list_head *regions; - struct reserved_region *reg; - u64 end = start + length - 1; - int ret = 0, error = 0; - - if (space_id == ACPI_ADR_SPACE_SYSTEM_IO) - regions = &reserved_io_regions; - else if (space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY) - regions = &reserved_mem_regions; - else - return -EINVAL; - - if (list_empty(regions)) - return add_region_before(start, end, space_id, flags, desc, regions); - - list_for_each_entry(reg, regions, node) - if (reg->start == end + 1) { - /* The new region can be prepended to this one. */ - ret = request_range(start, end, space_id, flags, desc); - if (!ret) - reg->start = start; - - return ret; - } else if (reg->start > end) { - /* No overlap. Add the new region here and get out. */ - return add_region_before(start, end, space_id, flags, - desc, ®->node); - } else if (reg->end == start - 1) { - goto combine; - } else if (reg->end >= start) { - goto overlap; - } - - /* The new region goes after the last existing one. */ - return add_region_before(start, end, space_id, flags, desc, regions); - - overlap: - /* - * The new region overlaps an existing one. - * - * The head part of the new region immediately preceding the existing - * overlapping one can be combined with it right away. - */ - if (reg->start > start) { - error = request_range(start, reg->start - 1, space_id, flags, desc); - if (error) - ret = error; - else - reg->start = start; - } - - combine: - /* - * The new region is adjacent to an existing one. If it extends beyond - * that region all the way to the next one, it is possible to combine - * all three of them. - */ - while (reg->end < end) { - struct reserved_region *next = NULL; - u64 a = reg->end + 1, b = end; - - if (!list_is_last(®->node, regions)) { - next = list_next_entry(reg, node); - if (next->start <= end) - b = next->start - 1; - } - error = request_range(a, b, space_id, flags, desc); - if (!error) { - if (next && next->start == b + 1) { - reg->end = next->end; - list_del(&next->node); - kfree(next); - } else { - reg->end = end; - break; - } - } else if (next) { - if (!ret) - ret = error; - - reg = next; - } else { - break; - } - } - - return ret ? ret : error; -} -EXPORT_SYMBOL_GPL(acpi_reserve_region); diff --git a/drivers/pnp/system.c b/drivers/pnp/system.c index 515f33882ab8..49c1720df59a 100644 --- a/drivers/pnp/system.c +++ b/drivers/pnp/system.c @@ -7,7 +7,6 @@ * Bjorn Helgaas */ -#include #include #include #include @@ -23,41 +22,25 @@ static const struct pnp_device_id pnp_dev_table[] = { {"", 0} }; -#ifdef CONFIG_ACPI -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - u8 space_id = io ? ACPI_ADR_SPACE_SYSTEM_IO : ACPI_ADR_SPACE_SYSTEM_MEMORY; - return !acpi_reserve_region(start, length, space_id, IORESOURCE_BUSY, desc); -} -#else -static bool __reserve_range(u64 start, unsigned int length, bool io, char *desc) -{ - struct resource *res; - - res = io ? request_region(start, length, desc) : - request_mem_region(start, length, desc); - if (res) { - res->flags &= ~IORESOURCE_BUSY; - return true; - } - return false; -} -#endif - static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) { char *regionid; const char *pnpid = dev_name(&dev->dev); resource_size_t start = r->start, end = r->end; - bool reserved; + struct resource *res; regionid = kmalloc(16, GFP_KERNEL); if (!regionid) return; snprintf(regionid, 16, "pnp %s", pnpid); - reserved = __reserve_range(start, end - start + 1, !!port, regionid); - if (!reserved) + if (port) + res = request_region(start, end - start + 1, regionid); + else + res = request_mem_region(start, end - start + 1, regionid); + if (res) + res->flags &= ~IORESOURCE_BUSY; + else kfree(regionid); /* @@ -66,7 +49,7 @@ static void reserve_range(struct pnp_dev *dev, struct resource *r, int port) * have double reservations. */ dev_info(&dev->dev, "%pR %s reserved\n", r, - reserved ? "has been" : "could not be"); + res ? "has been" : "could not be"); } static void reserve_resources_of_dev(struct pnp_dev *dev) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..fedfd1bea3bf 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -309,9 +309,6 @@ int acpi_check_region(resource_size_t start, resource_size_t n, int acpi_resources_are_enforced(void); -int acpi_reserve_region(u64 start, unsigned int length, u8 space_id, - unsigned long flags, char *desc); - #ifdef CONFIG_HIBERNATION void __init acpi_no_s4_hw_signature(void); #endif @@ -507,13 +504,6 @@ static inline int acpi_check_region(resource_size_t start, resource_size_t n, return 0; } -static inline int acpi_reserve_region(u64 start, unsigned int length, - u8 space_id, unsigned long flags, - char *desc) -{ - return -ENXIO; -} - struct acpi_table_header; static inline int acpi_table_parse(char *id, int (*handler)(struct acpi_table_header *)) -- cgit From d3e13ff3c1aa2403d9a5f371baac088daeb8f56d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Jul 2015 00:31:47 +0200 Subject: ACPI / LPSS: Fix up acpi_lpss_create_device() Fix a return value (which should be a negative error code) and a memory leak (the list allocated by acpi_dev_get_resources() needs to be freed on ioremap() errors too) in acpi_lpss_create_device() introduced by commit 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()'. Fixes: 4483d59e29fe 'ACPI / LPSS: check the result of ioremap()' Reported-by: Dan Carpenter Cc: 4.0+ # 4.0+ Signed-off-by: Rafael J. Wysocki --- drivers/acpi/acpi_lpss.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 569ee090343f..46b58abb08c5 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -352,13 +352,16 @@ static int acpi_lpss_create_device(struct acpi_device *adev, pdata->mmio_size = resource_size(rentry->res); pdata->mmio_base = ioremap(rentry->res->start, pdata->mmio_size); - if (!pdata->mmio_base) - goto err_out; break; } acpi_dev_free_resource_list(&resource_list); + if (!pdata->mmio_base) { + ret = -ENOMEM; + goto err_out; + } + pdata->dev_desc = dev_desc; if (dev_desc->setup) -- cgit From 7b2a4635b84b4dbb07c93201a8c0aea82ed65e4f Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 30 Jun 2015 10:58:44 +0800 Subject: clk: mediatek: mt8173: Fix enabling of critical clocks On the MT8173 the clocks are provided by different units. To enable the critical clocks we must be sure that all parent clocks are already registered, otherwise the parents of the critical clocks end up being unused and get disabled later. To find a place where all parents are registered we try each time after we've registered some clocks if all known providers are present now and only then we enable the critical clocks Signed-off-by: Sascha Hauer Signed-off-by: James Liao [sboyd@codeaurora.org: Marked function and data __init] Signed-off-by: Stephen Boyd --- drivers/clk/mediatek/clk-mt8173.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/drivers/clk/mediatek/clk-mt8173.c b/drivers/clk/mediatek/clk-mt8173.c index 4b9e04cdf7e8..8b6523d15fb8 100644 --- a/drivers/clk/mediatek/clk-mt8173.c +++ b/drivers/clk/mediatek/clk-mt8173.c @@ -700,6 +700,22 @@ static const struct mtk_composite peri_clks[] __initconst = { MUX(CLK_PERI_UART3_SEL, "uart3_ck_sel", uart_ck_sel_parents, 0x40c, 3, 1), }; +static struct clk_onecell_data *mt8173_top_clk_data __initdata; +static struct clk_onecell_data *mt8173_pll_clk_data __initdata; + +static void __init mtk_clk_enable_critical(void) +{ + if (!mt8173_top_clk_data || !mt8173_pll_clk_data) + return; + + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + clk_prepare_enable(mt8173_pll_clk_data->clks[CLK_APMIXED_ARMCA7PLL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_MEM_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_DDRPHYCFG_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_CCI400_SEL]); + clk_prepare_enable(mt8173_top_clk_data->clks[CLK_TOP_RTC_SEL]); +} + static void __init mtk_topckgen_init(struct device_node *node) { struct clk_onecell_data *clk_data; @@ -712,19 +728,19 @@ static void __init mtk_topckgen_init(struct device_node *node) return; } - clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); + mt8173_top_clk_data = clk_data = mtk_alloc_clk_data(CLK_TOP_NR_CLK); mtk_clk_register_factors(root_clk_alias, ARRAY_SIZE(root_clk_alias), clk_data); mtk_clk_register_factors(top_divs, ARRAY_SIZE(top_divs), clk_data); mtk_clk_register_composites(top_muxes, ARRAY_SIZE(top_muxes), base, &mt8173_clk_lock, clk_data); - clk_prepare_enable(clk_data->clks[CLK_TOP_CCI400_SEL]); - r = of_clk_add_provider(node, of_clk_src_onecell_get, clk_data); if (r) pr_err("%s(): could not register clock provider: %d\n", __func__, r); + + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_topckgen, "mediatek,mt8173-topckgen", mtk_topckgen_init); @@ -818,13 +834,13 @@ static void __init mtk_apmixedsys_init(struct device_node *node) { struct clk_onecell_data *clk_data; - clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); + mt8173_pll_clk_data = clk_data = mtk_alloc_clk_data(CLK_APMIXED_NR_CLK); if (!clk_data) return; mtk_clk_register_plls(node, plls, ARRAY_SIZE(plls), clk_data); - clk_prepare_enable(clk_data->clks[CLK_APMIXED_ARMCA15PLL]); + mtk_clk_enable_critical(); } CLK_OF_DECLARE(mtk_apmixedsys, "mediatek,mt8173-apmixedsys", mtk_apmixedsys_init); -- cgit From 93af5e93544328285a6f65f7d47bbea8979b28fb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 11:14:14 +0200 Subject: PM / Domains: Avoid infinite loops in attach/detach code If pm_genpd_{add,remove}_device() keeps on failing with -EAGAIN, we end up with an infinite loop in genpd_dev_pm_{at,de}tach(). This may happen due to a genpd.prepared_count imbalance. This is a bug elsewhere, but it will result in a system lock up, possibly during reboot of an otherwise functioning system. To avoid this, put a limit on the maximum number of loop iterations, using an exponential back-off mechanism. If the limit is reached, the operation will just fail. An error message is already printed. Signed-off-by: Geert Uytterhoeven Signed-off-by: Rafael J. Wysocki --- drivers/base/power/domain.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c index cdd547bd67df..0ee43c1056e0 100644 --- a/drivers/base/power/domain.c +++ b/drivers/base/power/domain.c @@ -6,6 +6,7 @@ * This file is released under the GPLv2. */ +#include #include #include #include @@ -19,6 +20,8 @@ #include #include +#define GENPD_RETRY_MAX_MS 250 /* Approximate */ + #define GENPD_DEV_CALLBACK(genpd, type, callback, dev) \ ({ \ type (*__routine)(struct device *__d); \ @@ -2131,6 +2134,7 @@ EXPORT_SYMBOL_GPL(of_genpd_get_from_provider); static void genpd_dev_pm_detach(struct device *dev, bool power_off) { struct generic_pm_domain *pd; + unsigned int i; int ret = 0; pd = pm_genpd_lookup_dev(dev); @@ -2139,10 +2143,12 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off) dev_dbg(dev, "removing from PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_remove_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } @@ -2183,6 +2189,7 @@ int genpd_dev_pm_attach(struct device *dev) { struct of_phandle_args pd_args; struct generic_pm_domain *pd; + unsigned int i; int ret; if (!dev->of_node) @@ -2218,10 +2225,12 @@ int genpd_dev_pm_attach(struct device *dev) dev_dbg(dev, "adding to PM domain %s\n", pd->name); - while (1) { + for (i = 1; i < GENPD_RETRY_MAX_MS; i <<= 1) { ret = pm_genpd_add_device(pd, dev); if (ret != -EAGAIN) break; + + mdelay(i); cond_resched(); } -- cgit From b51f9b103f58db2c5c0a20d6cee26c8bc255d3ae Mon Sep 17 00:00:00 2001 From: Uwe Geuder Date: Mon, 29 Jun 2015 23:35:05 +0300 Subject: PM / hibernate: clarify resume documentation it was not the whole truth that kernel mode cannot be used with swap on LVM Signed-off-by: Uwe Geuder Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- Documentation/power/swsusp.txt | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt index f732a8321e8a..8cc17ca71813 100644 --- a/Documentation/power/swsusp.txt +++ b/Documentation/power/swsusp.txt @@ -410,8 +410,17 @@ Documentation/usb/persist.txt. Q: Can I suspend-to-disk using a swap partition under LVM? -A: No. You can suspend successfully, but you'll not be able to -resume. uswsusp should be able to work with LVM. See suspend.sf.net. +A: Yes and No. You can suspend successfully, but the kernel will not be able +to resume on its own. You need an initramfs that can recognize the resume +situation, activate the logical volume containing the swap volume (but not +touch any filesystems!), and eventually call + +echo -n "$major:$minor" > /sys/power/resume + +where $major and $minor are the respective major and minor device numbers of +the swap volume. + +uswsusp works with LVM, too. See http://suspend.sourceforge.net/ Q: I upgraded the kernel from 2.6.15 to 2.6.16. Both kernels were compiled with the similar configuration files. Anyway I found that -- cgit From 26095a01d359827eeccec5459c28ddd976183179 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:20 +0200 Subject: ACPI / scan: Add support for ACPI _CLS device matching Device drivers typically use ACPI _HIDs/_CIDs listed in struct device_driver acpi_match_table to match devices. However, for generic drivers, we do not want to list _HID for all supported devices. Also, certain classes of devices do not have _CID (e.g. SATA, USB). Instead, we can leverage ACPI _CLS, which specifies PCI-defined class code (i.e. base-class, subclass and programming interface). This patch adds support for matching ACPI devices using the _CLS method. To support loadable module, current design uses _HID or _CID to match device's modalias. With the new way of matching with _CLS this would requires modification to the current ACPI modalias key to include _CLS. This patch appends PCI-defined class-code to the existing ACPI modalias as following. acpi::::..::: E.g: # cat /sys/devices/platform/AMDI0600:00/modalias acpi:AMDI0600:010601: where bb is th base-class code, ss is te sub-class code, and pp is the programming interface code Since there would not be _HID/_CID in the ACPI matching table of the driver, this patch adds a field to acpi_device_id to specify the matching _CLS. static const struct acpi_device_id ahci_acpi_match[] = { { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, {}, }; In this case, the corresponded entry in modules.alias file would be: alias acpi*:010601:* ahci_platform Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- drivers/acpi/scan.c | 32 ++++++++++++++++++++++++++++++-- include/linux/acpi.h | 14 ++++++++++++++ include/linux/mod_devicetable.h | 2 ++ scripts/mod/devicetable-offsets.c | 2 ++ scripts/mod/file2alias.c | 32 ++++++++++++++++++++++++++++++-- 5 files changed, 78 insertions(+), 4 deletions(-) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 2649a068671d..ec256352f423 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1019,6 +1019,29 @@ static bool acpi_of_match_device(struct acpi_device *adev, return false; } +static bool __acpi_match_device_cls(const struct acpi_device_id *id, + struct acpi_hardware_id *hwid) +{ + int i, msk, byte_shift; + char buf[3]; + + if (!id->cls) + return false; + + /* Apply class-code bitmask, before checking each class-code byte */ + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3 - i); + msk = (id->cls_msk >> byte_shift) & 0xFF; + if (!msk) + continue; + + sprintf(buf, "%02x", (id->cls >> byte_shift) & msk); + if (strncmp(buf, &hwid->id[(i - 1) * 2], 2)) + return false; + } + return true; +} + static const struct acpi_device_id *__acpi_match_device( struct acpi_device *device, const struct acpi_device_id *ids, @@ -1036,9 +1059,12 @@ static const struct acpi_device_id *__acpi_match_device( list_for_each_entry(hwid, &device->pnp.ids, list) { /* First, check the ACPI/PNP IDs provided by the caller. */ - for (id = ids; id->id[0]; id++) - if (!strcmp((char *) id->id, hwid->id)) + for (id = ids; id->id[0] || id->cls; id++) { + if (id->id[0] && !strcmp((char *) id->id, hwid->id)) return id; + else if (id->cls && __acpi_match_device_cls(id, hwid)) + return id; + } /* * Next, check ACPI_DT_NAMESPACE_HID and try to match the @@ -2101,6 +2127,8 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp, if (info->valid & ACPI_VALID_UID) pnp->unique_id = kstrdup(info->unique_id.string, GFP_KERNEL); + if (info->valid & ACPI_VALID_CLS) + acpi_add_id(pnp, info->class_code.string); kfree(info); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c471dfc93b71..bdfdb9f65c23 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -58,6 +58,19 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) +/** + * ACPI_DEVICE_CLASS - macro used to describe an ACPI device with + * the PCI-defined class-code information + * + * @_cls : the class, subclass, prog-if triple for this device + * @_msk : the class mask for this device + * + * This macro is used to create a struct acpi_device_id that matches a + * specific PCI class. The .id and .driver_data fields will be left + * initialized with the default value. + */ +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (_cls), .cls_msk = (_msk), + static inline bool has_acpi_companion(struct device *dev) { return is_acpi_node(dev->fwnode); @@ -446,6 +459,7 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *); #define ACPI_COMPANION(dev) (NULL) #define ACPI_COMPANION_SET(dev, adev) do { } while (0) #define ACPI_HANDLE(dev) (NULL) +#define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), struct fwnode_handle; diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8183d6640ca7..34f25b7bf642 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -189,6 +189,8 @@ struct css_device_id { struct acpi_device_id { __u8 id[ACPI_ID_LEN]; kernel_ulong_t driver_data; + __u32 cls; + __u32 cls_msk; }; #define PNP_ID_LEN 8 diff --git a/scripts/mod/devicetable-offsets.c b/scripts/mod/devicetable-offsets.c index eff7de1fc82e..e70fcd12eeeb 100644 --- a/scripts/mod/devicetable-offsets.c +++ b/scripts/mod/devicetable-offsets.c @@ -63,6 +63,8 @@ int main(void) DEVID(acpi_device_id); DEVID_FIELD(acpi_device_id, id); + DEVID_FIELD(acpi_device_id, cls); + DEVID_FIELD(acpi_device_id, cls_msk); DEVID(pnp_device_id); DEVID_FIELD(pnp_device_id, id); diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c index 84c86f3cd6cd..5f2088209132 100644 --- a/scripts/mod/file2alias.c +++ b/scripts/mod/file2alias.c @@ -523,12 +523,40 @@ static int do_serio_entry(const char *filename, } ADD_TO_DEVTABLE("serio", serio_device_id, do_serio_entry); -/* looks like: "acpi:ACPI0003 or acpi:PNP0C0B" or "acpi:LNXVIDEO" */ +/* looks like: "acpi:ACPI0003" or "acpi:PNP0C0B" or "acpi:LNXVIDEO" or + * "acpi:bbsspp" (bb=base-class, ss=sub-class, pp=prog-if) + * + * NOTE: Each driver should use one of the following : _HID, _CIDs + * or _CLS. Also, bb, ss, and pp can be substituted with ?? + * as don't care byte. + */ static int do_acpi_entry(const char *filename, void *symval, char *alias) { DEF_FIELD_ADDR(symval, acpi_device_id, id); - sprintf(alias, "acpi*:%s:*", *id); + DEF_FIELD_ADDR(symval, acpi_device_id, cls); + DEF_FIELD_ADDR(symval, acpi_device_id, cls_msk); + + if (id && strlen((const char *)*id)) + sprintf(alias, "acpi*:%s:*", *id); + else if (cls) { + int i, byte_shift, cnt = 0; + unsigned int msk; + + sprintf(&alias[cnt], "acpi*:"); + cnt = 6; + for (i = 1; i <= 3; i++) { + byte_shift = 8 * (3-i); + msk = (*cls_msk >> byte_shift) & 0xFF; + if (msk) + sprintf(&alias[cnt], "%02x", + (*cls >> byte_shift) & 0xFF); + else + sprintf(&alias[cnt], "??"); + cnt += 2; + } + sprintf(&alias[cnt], ":*"); + } return 1; } ADD_TO_DEVTABLE("acpi", acpi_device_id, do_acpi_entry); -- cgit From 2051e9248688e4c36e4de2e93b88fa0d74e86261 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Tue, 7 Jul 2015 01:55:21 +0200 Subject: ata: ahci_platform: Add ACPI _CLS matching This patch adds ACPI supports for AHCI platform driver, which uses _CLS method to match the device. The following is an example of ASL structure in DSDT for a SATA controller, which contains _CLS package to be matched by the ahci_platform driver: Device (AHC0) // AHCI Controller { Name(_HID, "AMDI0600") Name (_CCA, 1) Name (_CLS, Package (3) { 0x01, // Base Class: Mass Storage 0x06, // Sub-Class: serial ATA 0x01, // Interface: AHCI }) Name (_CRS, ResourceTemplate () { Memory32Fixed (ReadWrite, 0xE0300000, 0x00010000) Interrupt (ResourceConsumer, Level, ActiveHigh, Exclusive,,,) { 387 } }) } Also, since ATA driver should not require PCI support for ATA_ACPI, this patch removes dependency in the driver/ata/Kconfig. Acked-by: Tejun Heo Acked-by: Mika Westerberg Reviewed-by: Hanjun Guo Signed-off-by: Suravee Suthikulpanit Signed-off-by: Rafael J. Wysocki --- drivers/ata/Kconfig | 2 +- drivers/ata/ahci_platform.c | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig index 6d17a3b65ef7..15e40ee62a94 100644 --- a/drivers/ata/Kconfig +++ b/drivers/ata/Kconfig @@ -48,7 +48,7 @@ config ATA_VERBOSE_ERROR config ATA_ACPI bool "ATA ACPI Support" - depends on ACPI && PCI + depends on ACPI default y help This option adds support for ATA-related ACPI objects. diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c index 614c78f510f0..1befb114c384 100644 --- a/drivers/ata/ahci_platform.c +++ b/drivers/ata/ahci_platform.c @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include "ahci.h" #define DRV_NAME "ahci" @@ -79,12 +81,19 @@ static const struct of_device_id ahci_of_match[] = { }; MODULE_DEVICE_TABLE(of, ahci_of_match); +static const struct acpi_device_id ahci_acpi_match[] = { + { ACPI_DEVICE_CLASS(PCI_CLASS_STORAGE_SATA_AHCI, 0xffffff) }, + {}, +}; +MODULE_DEVICE_TABLE(acpi, ahci_acpi_match); + static struct platform_driver ahci_driver = { .probe = ahci_probe, .remove = ata_platform_remove_one, .driver = { .name = DRV_NAME, .of_match_table = ahci_of_match, + .acpi_match_table = ahci_acpi_match, .pm = &ahci_pm_ops, }, }; -- cgit From b32aadc1a8ed84afbe924cd2ced31cd6a2e67074 Mon Sep 17 00:00:00 2001 From: "Shreyas B. Prabhu" Date: Tue, 7 Jul 2015 01:39:23 +0530 Subject: powerpc/powernv: Fix race in updating core_idle_state core_idle_state is maintained for each core. It uses 0-7 bits to track whether a thread in the core has entered fastsleep or winkle. 8th bit is used as a lock bit. The lock bit is set in these 2 scenarios- - The thread is first in subcore to wakeup from sleep/winkle. - If its the last thread in the core about to enter sleep/winkle While the lock bit is set, if any other thread in the core wakes up, it loops until the lock bit is cleared before proceeding in the wakeup path. This helps prevent race conditions w.r.t fastsleep workaround and prevents threads from switching to process context before core/subcore resources are restored. But, in the path to sleep/winkle entry, we currently don't check for lock-bit. This exposes us to following race when running with subcore on- First thread in the subcorea Another thread in the same waking up core entering sleep/winkle lwarx r15,0,r14 ori r15,r15,PNV_CORE_IDLE_LOCK_BIT stwcx. r15,0,r14 [Code to restore subcore state] lwarx r15,0,r14 [clear thread bit] stwcx. r15,0,r14 andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS stw r15,0(r14) Here, after the thread entering sleep clears its thread bit in core_idle_state, the value is overwritten by the thread waking up. In such cases when the core enters fastsleep, code mistakes an idle thread as running. Because of this, the first thread waking up from fastsleep which is supposed to resync timebase skips it. So we can end up having a core with stale timebase value. This patch fixes the above race by looping on the lock bit even while entering the idle states. Signed-off-by: Shreyas B. Prabhu Fixes: 7b54e9f213f76 'powernv/powerpc: Add winkle support for offline cpus' Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_power7.S | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S index ccde8f084ce4..112ccf497562 100644 --- a/arch/powerpc/kernel/idle_power7.S +++ b/arch/powerpc/kernel/idle_power7.S @@ -51,6 +51,22 @@ .text +/* + * Used by threads when the lock bit of core_idle_state is set. + * Threads will spin in HMT_LOW until the lock bit is cleared. + * r14 - pointer to core_idle_state + * r15 - used to load contents of core_idle_state + */ + +core_idle_lock_held: + HMT_LOW +3: lwz r15,0(r14) + andi. r15,r15,PNV_CORE_IDLE_LOCK_BIT + bne 3b + HMT_MEDIUM + lwarx r15,0,r14 + blr + /* * Pass requested state in r3: * r3 - PNV_THREAD_NAP/SLEEP/WINKLE @@ -150,6 +166,10 @@ power7_enter_nap_mode: ld r14,PACA_CORE_IDLE_STATE_PTR(r13) lwarx_loop1: lwarx r15,0,r14 + + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bnel core_idle_lock_held + andc r15,r15,r7 /* Clear thread bit */ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS @@ -294,7 +314,7 @@ lwarx_loop2: * workaround undo code or resyncing timebase or restoring context * In either case loop until the lock bit is cleared. */ - bne core_idle_lock_held + bnel core_idle_lock_held cmpwi cr2,r15,0 lbz r4,PACA_SUBCORE_SIBLING_MASK(r13) @@ -319,15 +339,6 @@ lwarx_loop2: isync b common_exit -core_idle_lock_held: - HMT_LOW -core_idle_lock_loop: - lwz r15,0(14) - andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT - bne core_idle_lock_loop - HMT_MEDIUM - b lwarx_loop2 - first_thread_in_subcore: /* First thread in subcore to wakeup */ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT -- cgit From 747d34e7313034768aac83d679db43cedc5ab778 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Sat, 27 Jun 2015 23:11:44 +0200 Subject: clocksource/imx: Define clocksource for mx27 The rework of the imx clocksource driver missed to add an entry for imx27 which results in a boot failure on those machines. Add the proper CLOCKSOURCE_OF_DECLARE() entry for imx27 and map it to the imx21 init. Fixes: bef11c881ba5 'ARM: imx: initialize gpt device type for DT boot' Signed-off-by: Philippe Reynes Cc: linux-arm-kernel@lists.infradead.org Cc: daniel.lezcano@linaro.org Cc: fabio.estevam@freescale.com Cc: shawnguo@kernel.org Cc: kernel@pengutronix.de Link: http://lkml.kernel.org/r/1435439504-406-1-git-send-email-tremyfr@gmail.com Signed-off-by: Thomas Gleixner --- drivers/clocksource/timer-imx-gpt.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/clocksource/timer-imx-gpt.c b/drivers/clocksource/timer-imx-gpt.c index 879c78423546..2d59038dec43 100644 --- a/drivers/clocksource/timer-imx-gpt.c +++ b/drivers/clocksource/timer-imx-gpt.c @@ -529,6 +529,7 @@ static void __init imx6dl_timer_init_dt(struct device_node *np) CLOCKSOURCE_OF_DECLARE(imx1_timer, "fsl,imx1-gpt", imx1_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx21_timer, "fsl,imx21-gpt", imx21_timer_init_dt); +CLOCKSOURCE_OF_DECLARE(imx27_timer, "fsl,imx27-gpt", imx21_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx31_timer, "fsl,imx31-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx25_timer, "fsl,imx25-gpt", imx31_timer_init_dt); CLOCKSOURCE_OF_DECLARE(imx50_timer, "fsl,imx50-gpt", imx31_timer_init_dt); -- cgit From 7c4a976cd55972b68c75a978f171b6db5df4ce66 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 7 Jul 2015 10:14:35 +0200 Subject: clockevents: Allow set-state callbacks to be optional Its mandatory for the drivers to provide set_state_{oneshot|periodic}() (only if related modes are supported) and set_state_shutdown() callbacks today, if they are implementing the new set-state interface. But this leads to unnecessary noop callbacks for drivers which don't want to implement them. Over that, it will lead to a full function call for nothing really useful. Lets make all set-state callbacks optional. Suggested-by: Daniel Lezcano Signed-off-by: Viresh Kumar Signed-off-by: Daniel Lezcano Link: http://lkml.kernel.org/r/1436256875-15562-1-git-send-email-daniel.lezcano@linaro.org Signed-off-by: Thomas Gleixner --- kernel/time/clockevents.c | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c index 08ccc3da3ca0..50eb107f1198 100644 --- a/kernel/time/clockevents.c +++ b/kernel/time/clockevents.c @@ -120,19 +120,25 @@ static int __clockevents_switch_state(struct clock_event_device *dev, /* The clockevent device is getting replaced. Shut it down. */ case CLOCK_EVT_STATE_SHUTDOWN: - return dev->set_state_shutdown(dev); + if (dev->set_state_shutdown) + return dev->set_state_shutdown(dev); + return 0; case CLOCK_EVT_STATE_PERIODIC: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_PERIODIC)) return -ENOSYS; - return dev->set_state_periodic(dev); + if (dev->set_state_periodic) + return dev->set_state_periodic(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT: /* Core internal bug */ if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) return -ENOSYS; - return dev->set_state_oneshot(dev); + if (dev->set_state_oneshot) + return dev->set_state_oneshot(dev); + return 0; case CLOCK_EVT_STATE_ONESHOT_STOPPED: /* Core internal bug */ @@ -471,18 +477,6 @@ static int clockevents_sanity_check(struct clock_event_device *dev) if (dev->features & CLOCK_EVT_FEAT_DUMMY) return 0; - /* New state-specific callbacks */ - if (!dev->set_state_shutdown) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && - !dev->set_state_periodic) - return -EINVAL; - - if ((dev->features & CLOCK_EVT_FEAT_ONESHOT) && - !dev->set_state_oneshot) - return -EINVAL; - return 0; } -- cgit From 3f8dc44d88d3e86178eb9322c779c599f3745b21 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 7 Jul 2015 11:01:17 +1000 Subject: cxl: Fix refcounting in kernel API Currently the kernel API AFU dev refcounting is done on context start and stop. This patch moves this refcounting to context init and release, bringing it inline with how the userspace API does it. Without this we've seen the refcounting on the AFU get out of whack between the user and kernel API usage. This causes the AFU structures to be freed when they are actually still in use. This fixes some kref warnings we've been seeing and spurious ErrIVTE IRQs. Signed-off-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 0c77240ae2fc..729e0851167d 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -23,6 +23,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) afu = cxl_pci_to_afu(dev); + get_device(&afu->dev); ctx = cxl_context_alloc(); if (IS_ERR(ctx)) return ctx; @@ -31,6 +32,7 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) rc = cxl_context_init(ctx, afu, false, NULL); if (rc) { kfree(ctx); + put_device(&afu->dev); return ERR_PTR(-ENOMEM); } cxl_assign_psn_space(ctx); @@ -60,6 +62,8 @@ int cxl_release_context(struct cxl_context *ctx) if (ctx->status != CLOSED) return -EBUSY; + put_device(&ctx->afu->dev); + cxl_context_free(ctx); return 0; @@ -159,7 +163,6 @@ int cxl_start_context(struct cxl_context *ctx, u64 wed, } ctx->status = STARTED; - get_device(&ctx->afu->dev); out: mutex_unlock(&ctx->status_mutex); return rc; @@ -175,12 +178,7 @@ EXPORT_SYMBOL_GPL(cxl_process_element); /* Stop a context. Returns 0 on success, otherwise -Errno */ int cxl_stop_context(struct cxl_context *ctx) { - int rc; - - rc = __detach_context(ctx); - if (!rc) - put_device(&ctx->afu->dev); - return rc; + return __detach_context(ctx); } EXPORT_SYMBOL_GPL(cxl_stop_context); -- cgit From 5a3f75e3f02836518ce49536e9c460ca8e1fa290 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:32 +0000 Subject: x86/irq: Plug irq vector hotplug race Jin debugged a nasty cpu hotplug race which results in leaking a irq vector on the newly hotplugged cpu. cpu N cpu M native_cpu_up device_shutdown do_boot_cpu free_msi_irqs start_secondary arch_teardown_msi_irqs smp_callin default_teardown_msi_irqs setup_vector_irq arch_teardown_msi_irq __setup_vector_irq native_teardown_msi_irq lock(vector_lock) destroy_irq install vectors unlock(vector_lock) lock(vector_lock) ---> __clear_irq_vector unlock(vector_lock) lock(vector_lock) set_cpu_online unlock(vector_lock) This leaves the irq vector(s) which are torn down on CPU M stale in the vector array of CPU N, because CPU M does not see CPU N online yet. There is a similar issue with concurrent newly setup interrupts. The alloc/free protection of irq descriptors does not prevent the above race, because it merily prevents interrupt descriptors from going away or changing concurrently. Prevent this by moving the call to setup_vector_irq() into the vector_lock held region which protects set_cpu_online(): cpu N cpu M native_cpu_up device_shutdown do_boot_cpu free_msi_irqs start_secondary arch_teardown_msi_irqs smp_callin default_teardown_msi_irqs lock(vector_lock) arch_teardown_msi_irq setup_vector_irq() __setup_vector_irq native_teardown_msi_irq install vectors destroy_irq set_cpu_online unlock(vector_lock) lock(vector_lock) __clear_irq_vector unlock(vector_lock) So cpu M either sees the cpu N online before clearing the vector or cpu N installs the vectors after cpu M has cleared it. Reported-by: xiao jin Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.141898931@linutronix.de --- arch/x86/kernel/apic/vector.c | 10 ++-------- arch/x86/kernel/smpboot.c | 13 +++++-------- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 28eba2d38b15..f813261d9740 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -409,12 +409,6 @@ static void __setup_vector_irq(int cpu) int irq, vector; struct apic_chip_data *data; - /* - * vector_lock will make sure that we don't run into irq vector - * assignments that might be happening on another cpu in parallel, - * while we setup our initial vector to irq mappings. - */ - raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_active_irq(irq) { data = apic_chip_data(irq_get_irq_data(irq)); @@ -436,16 +430,16 @@ static void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, data->domain)) per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; } - raw_spin_unlock(&vector_lock); } /* - * Setup the vector to irq mappings. + * Setup the vector to irq mappings. Must be called with vector_lock held. */ void setup_vector_irq(int cpu) { int irq; + lockdep_assert_held(&vector_lock); /* * On most of the platforms, legacy PIC delivers the interrupts on the * boot cpu. But there are certain platforms where PIC interrupts are diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0bd8c1d507b3..d3010aa79daf 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -170,11 +170,6 @@ static void smp_callin(void) */ apic_ap_setup(); - /* - * Need to setup vector mappings before we enable interrupts. - */ - setup_vector_irq(smp_processor_id()); - /* * Save our processor parameters. Note: this information * is needed for clock calibration. @@ -239,11 +234,13 @@ static void notrace start_secondary(void *unused) check_tsc_sync_target(); /* - * We need to hold vector_lock so there the set of online cpus - * does not change while we are assigning vectors to cpus. Holding - * this lock ensures we don't half assign or remove an irq from a cpu. + * Lock vector_lock and initialize the vectors on this cpu + * before setting the cpu online. We must set it online with + * vector_lock held to prevent a concurrent setup/teardown + * from seeing a half valid vector space. */ lock_vector_lock(); + setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); cpu_set_state_online(smp_processor_id()); -- cgit From cbb24dc761d95fe39a7a122bb1b298e9604cae15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:33 +0000 Subject: x86/irq: Use proper locking in check_irq_vectors_for_cpu_disable() It's unsafe to examine fields in the irq descriptor w/o holding the descriptor lock. Add proper locking. While at it add a comment why the vector check can run lock less Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.236544164@linutronix.de --- arch/x86/kernel/irq.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 88b366487b0e..85ca76e6241c 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -347,14 +347,22 @@ int check_irq_vectors_for_cpu_disable(void) if (!desc) continue; + /* + * Protect against concurrent action removal, + * affinity changes etc. + */ + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); cpumask_copy(&affinity_new, data->affinity); cpumask_clear_cpu(this_cpu, &affinity_new); /* Do not count inactive or per-cpu irqs. */ - if (!irq_has_action(irq) || irqd_is_per_cpu(data)) + if (!irq_has_action(irq) || irqd_is_per_cpu(data)) { + raw_spin_unlock(&desc->lock); continue; + } + raw_spin_unlock(&desc->lock); /* * A single irq may be mapped to multiple * cpu's vector_irq[] (for example IOAPIC cluster @@ -385,6 +393,9 @@ int check_irq_vectors_for_cpu_disable(void) * vector. If the vector is marked in the used vectors * bitmap or an irq is assigned to it, we don't count * it as available. + * + * As this is an inaccurate snapshot anyway, we can do + * this w/o holding vector_lock. */ for (vector = FIRST_EXTERNAL_VECTOR; vector < first_system_vector; vector++) { -- cgit From 09cf92b784fae6109450c5d64f9908066d605249 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:35 +0000 Subject: x86/irq: Retrieve irq data after locking irq_desc irq_data is protected by irq_desc->lock, so retrieving the irq chip from irq_data outside the lock is racy vs. an concurrent update. Move it into the lock held region. While at it add a comment why the vector walk does not require vector_lock. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.331320612@linutronix.de --- arch/x86/kernel/irq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 85ca76e6241c..c7dfe1be784e 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -497,6 +497,11 @@ void fixup_irqs(void) */ mdelay(1); + /* + * We can walk the vector array of this cpu without holding + * vector_lock because the cpu is already marked !online, so + * nothing else will touch it. + */ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { unsigned int irr; @@ -508,9 +513,9 @@ void fixup_irqs(void) irq = __this_cpu_read(vector_irq[vector]); desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); data = irq_desc_get_irq_data(desc); chip = irq_data_get_irq_chip(data); - raw_spin_lock(&desc->lock); if (chip->irq_retrigger) { chip->irq_retrigger(data); __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); -- cgit From 6d3dab7d84177f836b14961b4d252d0959d66768 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Jul 2015 13:08:39 +0200 Subject: PM / wakeirq: Avoid setting power.wakeirq too hastily If dev_pm_attach_wake_irq() fails, the device's power.wakeirq field should not be set to point to the struct wake_irq passed to that function, as that object will be freed going forward. For this reason, make dev_pm_attach_wake_irq() first call device_wakeup_attach_irq() and only set the device's power.wakeirq field if that's successful. That requires device_wakeup_attach_irq() to be called under the device's power.lock lock, but since dev_pm_attach_wake_irq() is the only caller of it, the requisite changes are easy to make. Fixes: 4990d4fe327b (PM / Wakeirq: Add automated device wake IRQ handling) Reported-by: Felipe Balbi Tested-by: Tony Lindgren Signed-off-by: Rafael J. Wysocki --- drivers/base/power/wakeirq.c | 12 +++++------- drivers/base/power/wakeup.c | 31 ++++++++++--------------------- 2 files changed, 15 insertions(+), 28 deletions(-) diff --git a/drivers/base/power/wakeirq.c b/drivers/base/power/wakeirq.c index 7470004ca810..eb6e67451dec 100644 --- a/drivers/base/power/wakeirq.c +++ b/drivers/base/power/wakeirq.c @@ -45,14 +45,12 @@ static int dev_pm_attach_wake_irq(struct device *dev, int irq, return -EEXIST; } - dev->power.wakeirq = wirq; - spin_unlock_irqrestore(&dev->power.lock, flags); - err = device_wakeup_attach_irq(dev, wirq); - if (err) - return err; + if (!err) + dev->power.wakeirq = wirq; - return 0; + spin_unlock_irqrestore(&dev->power.lock, flags); + return err; } /** @@ -105,10 +103,10 @@ void dev_pm_clear_wake_irq(struct device *dev) return; spin_lock_irqsave(&dev->power.lock, flags); + device_wakeup_detach_irq(dev); dev->power.wakeirq = NULL; spin_unlock_irqrestore(&dev->power.lock, flags); - device_wakeup_detach_irq(dev); if (wirq->dedicated_irq) free_irq(wirq->irq, wirq); kfree(wirq); diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index 7332ebc9cab0..15d27d782dc1 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -247,32 +247,25 @@ EXPORT_SYMBOL_GPL(device_wakeup_enable); * Attach a device wakeirq to the wakeup source so the device * wake IRQ can be configured automatically for suspend and * resume. + * + * Call under the device's power.lock lock. */ int device_wakeup_attach_irq(struct device *dev, struct wake_irq *wakeirq) { struct wakeup_source *ws; - int ret = 0; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; if (!ws) { dev_err(dev, "forgot to call call device_init_wakeup?\n"); - ret = -EINVAL; - goto unlock; + return -EINVAL; } - if (ws->wakeirq) { - ret = -EEXIST; - goto unlock; - } + if (ws->wakeirq) + return -EEXIST; ws->wakeirq = wakeirq; - -unlock: - spin_unlock_irq(&dev->power.lock); - - return ret; + return 0; } /** @@ -280,20 +273,16 @@ unlock: * @dev: Device to handle * * Removes a device wakeirq from the wakeup source. + * + * Call under the device's power.lock lock. */ void device_wakeup_detach_irq(struct device *dev) { struct wakeup_source *ws; - spin_lock_irq(&dev->power.lock); ws = dev->power.wakeup; - if (!ws) - goto unlock; - - ws->wakeirq = NULL; - -unlock: - spin_unlock_irq(&dev->power.lock); + if (ws) + ws->wakeirq = NULL; } /** -- cgit From b6cfb277378ef831c0fa84bcff5049307294adc6 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Mon, 6 Jul 2015 17:16:47 -0600 Subject: ACPI / ARM64: add BAD_MADT_GICC_ENTRY() macro The BAD_MADT_ENTRY() macro is designed to work for all of the subtables of the MADT. In the ACPI 5.1 version of the spec, the struct for the GICC subtable (struct acpi_madt_generic_interrupt) is 76 bytes long; in ACPI 6.0, the struct is 80 bytes long. But, there is only one definition in ACPICA for this struct -- and that is the 6.0 version. Hence, when BAD_MADT_ENTRY() compares the struct size to the length in the GICC subtable, it fails if 5.1 structs are in use, and there are systems in the wild that have them. This patch adds the BAD_MADT_GICC_ENTRY() that checks the GICC subtable only, accounting for the difference in specification versions that are possible. The BAD_MADT_ENTRY() will continue to work as is for all other MADT subtables. This code is being added to an arm64 header file since that is currently the only architecture using the GICC subtable of the MADT. As a GIC is specific to ARM, it is also unlikely the subtable will be used elsewhere. Fixes: aeb823bbacc2 ("ACPICA: ACPI 6.0: Add changes for FADT table.") Signed-off-by: Al Stone Acked-by: Will Deacon Acked-by: "Rafael J. Wysocki" [catalin.marinas@arm.com: extra brackets around macro arguments] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 39248d3adf5d..406485ed110a 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -19,6 +19,14 @@ #include #include +/* Macros for consistency checks of the GICC subtable of MADT */ +#define ACPI_MADT_GICC_LENGTH \ + (acpi_gbl_FADT.header.revision < 6 ? 76 : 80) + +#define BAD_MADT_GICC_ENTRY(entry, end) \ + (!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \ + (entry)->header.length != ACPI_MADT_GICC_LENGTH) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI /* ACPI table mapping after acpi_gbl_permanent_mmap is set */ -- cgit From 99e3e3ae332b6ca91d5c444ea7849f367f5e5a76 Mon Sep 17 00:00:00 2001 From: Al Stone Date: Mon, 6 Jul 2015 17:16:48 -0600 Subject: ACPI / ARM64 : use the new BAD_MADT_GICC_ENTRY macro For those parts of the arm64 ACPI code that need to check GICC subtables in the MADT, use the new BAD_MADT_GICC_ENTRY macro instead of the previous BAD_MADT_ENTRY. The new macro takes into account differences in the size of the GICC subtable that the old macro did not; this caused failures even though the subtable entries are valid. Fixes: aeb823bbacc2 ("ACPICA: ACPI 6.0: Add changes for FADT table.") Signed-off-by: Al Stone Reviewed-by: Hanjun Guo Acked-by: Will Deacon Acked-by: "Rafael J. Wysocki" Signed-off-by: Catalin Marinas --- arch/arm64/kernel/smp.c | 2 +- drivers/irqchip/irq-gic.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 695801a54ca5..50fb4696654e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -438,7 +438,7 @@ acpi_parse_gic_cpu_interface(struct acpi_subtable_header *header, struct acpi_madt_generic_interrupt *processor; processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; acpi_table_print_madt_entry(header); diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 8d7e1c8b6d56..4dd88264dff5 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -1055,7 +1055,7 @@ gic_acpi_parse_madt_cpu(struct acpi_subtable_header *header, processor = (struct acpi_madt_generic_interrupt *)header; - if (BAD_MADT_ENTRY(processor, end)) + if (BAD_MADT_GICC_ENTRY(processor, end)) return -EINVAL; /* -- cgit From ef37566cf8d607844cee5a01597d828c8fd1a501 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 7 Jul 2015 17:15:39 +0100 Subject: arm64: Keep the ARM64 Kconfig selects sorted Move EDAC_SUPPORT to the right place. Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0f6edb14b7e4..318175f62c24 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -23,9 +23,9 @@ config ARM64 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK - select EDAC_SUPPORT select CPU_PM if (SUSPEND || CPU_IDLE) select DCACHE_WORD_ACCESS + select EDAC_SUPPORT select GENERIC_ALLOCATOR select GENERIC_CLOCKEVENTS select GENERIC_CLOCKEVENTS_BROADCAST if SMP -- cgit From 8eb231261fdd20768db23863d00ef277de4b0543 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 14:11:00 +0200 Subject: tick/broadcast: Prevent hrtimer recursion The hrtimer based broadcast vehicle can cause a hrtimer recursion which went unnoticed until we changed the hrtimer expiry code to keep track of the currently running timer. local_timer_interrupt() local_handler() hrtimer_interrupt() expire_hrtimers() broadcast_hrtimer() send_ipis() local_handler() hrtimer_interrupt() .... Solution is simple: Prevent the local handler call from the broadcast code when the broadcast 'device' is hrtimer based. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index d39f32cdd1b5..a76204089f78 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -265,8 +265,22 @@ static bool tick_do_broadcast(struct cpumask *mask) * Check, if the current cpu is in the mask */ if (cpumask_test_cpu(cpu, mask)) { + struct clock_event_device *bc = tick_broadcast_device.evtdev; + cpumask_clear_cpu(cpu, mask); - local = true; + /* + * We only run the local handler, if the broadcast + * device is not hrtimer based. Otherwise we run into + * a hrtimer recursion. + * + * local timer_interrupt() + * local_handler() + * expire_hrtimers() + * bc_handler() + * local_handler() + * expire_hrtimers() + */ + local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); } if (!cpumask_empty(mask)) { -- cgit From e0454311903d3fd0f12a86c9e65d7b271c2bb05d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 14:07:27 +0200 Subject: tick/broadcast: Sanity check the shutdown of the local clock_event The broadcast code shuts down the local clock event unconditionally even if no broadcast device is installed or if the broadcast device is hrtimer based. Add proper sanity checks. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index a76204089f78..9877d0b0aefc 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -159,7 +159,7 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { struct clock_event_device *bc = tick_broadcast_device.evtdev; unsigned long flags; - int ret; + int ret = 0; raw_spin_lock_irqsave(&tick_broadcast_lock, flags); @@ -221,13 +221,14 @@ int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) * If we kept the cpu in the broadcast mask, * tell the caller to leave the per cpu device * in shutdown state. The periodic interrupt - * is delivered by the broadcast device. + * is delivered by the broadcast device, if + * the broadcast device exists and is not + * hrtimer based. */ - ret = cpumask_test_cpu(cpu, tick_broadcast_mask); + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) + ret = cpumask_test_cpu(cpu, tick_broadcast_mask); break; default: - /* Nothing to do */ - ret = 0; break; } } @@ -373,8 +374,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) case TICK_BROADCAST_ON: cpumask_set_cpu(cpu, tick_broadcast_on); if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { - if (tick_broadcast_device.mode == - TICKDEV_MODE_PERIODIC) + /* + * Only shutdown the cpu local device, if: + * + * - the broadcast device exists + * - the broadcast device is not a hrtimer based one + * - the broadcast device is in periodic mode to + * avoid a hickup during switch to oneshot mode + */ + if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && + tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) clockevents_shutdown(dev); } break; -- cgit From f32dd117051185da6e923b35491a44d7debeeea5 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:29:38 +0200 Subject: tick/broadcast: Make idle check independent from mode and config Currently the broadcast busy check, which prevents the idle code from going into deep idle, works only in one shot mode. If NOHZ and HIGHRES are off (config or command line) there is no sanity check at all, so under certain conditions cpus are allowed to go into deep idle, where the local timer stops, and are not woken up again because there is no broadcast timer installed or a hrtimer based broadcast device is not evaluated. Move tick_broadcast_oneshot_control() into the common code and provide proper subfunctions for the various config combinations. The common check in tick_broadcast_oneshot_control() is for the C3STOP misfeature flag of the local clock event device. If its not set, idle can proceed. If set, further checks are necessary. Provide checks for the trivial cases: - If broadcast is disabled in the config, then return busy - If oneshot mode (NOHZ/HIGHES) is disabled in the config, return busy if the broadcast device is hrtimer based. - If oneshot mode is enabled in the config call the original tick_broadcast_oneshot_control() function. That function needs extra checks which will be implemented in seperate patches. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- include/linux/tick.h | 4 ---- kernel/time/tick-broadcast.c | 26 +++++++++++--------------- kernel/time/tick-common.c | 21 +++++++++++++++++++++ kernel/time/tick-sched.h | 10 ++++++++++ 4 files changed, 42 insertions(+), 19 deletions(-) diff --git a/include/linux/tick.h b/include/linux/tick.h index 3741ba1a652c..6916dcb61857 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,11 +67,7 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ -#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT) extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); -#else -static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) { return 0; } -#endif static inline void tick_broadcast_enable(void) { diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 9877d0b0aefc..ef77b16ad5df 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -685,18 +685,7 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); } -/** - * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode - * @state: The target state (enter/exit) - * - * The system enters/leaves a state, where affected devices might stop - * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. - * - * Called with interrupts disabled, so clockevents_lock is not - * required here because the local clock event device cannot go away - * under us. - */ -int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; struct tick_device *td; @@ -717,9 +706,6 @@ int tick_broadcast_oneshot_control(enum tick_broadcast_state state) td = this_cpu_ptr(&tick_cpu_device); dev = td->evtdev; - if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) - return 0; - raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); @@ -961,6 +947,16 @@ bool tick_broadcast_oneshot_available(void) return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; } +#else +int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct clock_event_device *bc = tick_broadcast_device.evtdev; + + if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) + return -EBUSY; + + return 0; +} #endif void __init tick_broadcast_init(void) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 76446cb5dfe1..55e13efff1ab 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -343,6 +343,27 @@ out_bc: tick_install_broadcast_device(newdev); } +/** + * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode + * @state: The target state (enter/exit) + * + * The system enters/leaves a state, where affected devices might stop + * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. + * + * Called with interrupts disabled, so clockevents_lock is not + * required here because the local clock event device cannot go away + * under us. + */ +int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + + if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) + return 0; + + return __tick_broadcast_oneshot_control(state); +} + #ifdef CONFIG_HOTPLUG_CPU /* * Transfer the do_timer job away from a dying cpu. diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h index 42fdf4958bcc..a4a8d4e9baa1 100644 --- a/kernel/time/tick-sched.h +++ b/kernel/time/tick-sched.h @@ -71,4 +71,14 @@ extern void tick_cancel_sched_timer(int cpu); static inline void tick_cancel_sched_timer(int cpu) { } #endif +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +extern int __tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int +__tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return -EBUSY; +} +#endif + #endif -- cgit From b78f3f3c898c824bf56ab55cfa59fc72be49c349 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:34:32 +0200 Subject: tick/broadcast: Prevent deep idle if no broadcast device available Add a check for a installed broadcast device to the oneshot control function and return busy if not. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ef77b16ad5df..fad3f789beec 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -692,6 +692,13 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) int cpu, ret = 0; ktime_t now; + /* + * If there is no broadcast device, tell the caller not to go + * into deep idle. + */ + if (!tick_broadcast_device.evtdev) + return -EBUSY; + /* * Periodic mode does not care about the enter/exit of power * states -- cgit From e3ac79e087ffe8a1f953ed44a74acf7616cb0b25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:38:11 +0200 Subject: tick/broadcast: Move the check for periodic mode inside state handling We need to check more than the periodic mode for proper operation in all runtime combinations. To avoid code duplication move the check into the enter state handling. No functional change. [ Split out from a larger combo patch ] Reported-and-tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index fad3f789beec..83aa92e87a85 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -688,7 +688,6 @@ static void broadcast_shutdown_local(struct clock_event_device *bc, int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) { struct clock_event_device *bc, *dev; - struct tick_device *td; int cpu, ret = 0; ktime_t now; @@ -699,25 +698,20 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) if (!tick_broadcast_device.evtdev) return -EBUSY; - /* - * Periodic mode does not care about the enter/exit of power - * states - */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - return 0; - - /* - * We are called with preemtion disabled from the depth of the - * idle code, so we can't be moved away. - */ - td = this_cpu_ptr(&tick_cpu_device); - dev = td->evtdev; + dev = this_cpu_ptr(&tick_cpu_device)->evtdev; raw_spin_lock(&tick_broadcast_lock); bc = tick_broadcast_device.evtdev; cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the broadcast device is in periodic mode, we + * return. + */ + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + goto out; + if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); broadcast_shutdown_local(bc, dev); -- cgit From d33257264b0267a8fd20f6717abbb484c9e21130 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 17:45:22 +0200 Subject: tick/broadcast: Return busy if periodic mode and hrtimer broadcast If the system is in periodic mode and the broadcast device is hrtimer based, return busy as we have no proper handling for this. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 83aa92e87a85..da7b40fdf6d0 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -709,8 +709,12 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) * If the broadcast device is in periodic mode, we * return. */ - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { + /* If it is a hrtimer based broadcast, return busy */ + if (bc->features & CLOCK_EVT_FEAT_HRTIMER) + ret = -EBUSY; goto out; + } if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); -- cgit From 0cc5281aa592d0020868f6ccaed359b4ad7b2684 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:45:15 +0200 Subject: tick/broadcast: Return busy when IPI is pending Tell the idle code not to go deep if the broadcast IPI is about to arrive. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index da7b40fdf6d0..70b47bc928b9 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -725,11 +725,15 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) * if the cpu local event is earlier than the * broadcast event. If the current CPU is in * the force mask, then we are going to be - * woken by the IPI right away. + * woken by the IPI right away; we return + * busy, so the CPU does not try to go deep + * idle. */ - if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && - dev->next_event.tv64 < bc->next_event.tv64) + if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { + ret = -EBUSY; + } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + } } /* * If the current CPU owns the hrtimer broadcast -- cgit From d5113e13a550bc9c2b53cc9944b8a06453c4a0a1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 16:43:04 +0200 Subject: tick/broadcast: Check for hrtimer broadcast active early If the current cpu is the one which has the hrtimer based broadcast queued then we better return busy immediately instead of going through loops and hoops to figure that out. [ Split out from a larger combo patch ] Tested-by: Sudeep Holla Signed-off-by: Thomas Gleixner Cc: Suzuki Poulose Cc: Lorenzo Pieralisi Cc: Catalin Marinas Cc: Rafael J. Wysocki Cc: Peter Zijlstra Cc: Preeti U Murthy Cc: Ingo Molnar Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1507070929360.3916@nanos --- kernel/time/tick-broadcast.c | 36 ++++++++++++++++++++++++++---------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 70b47bc928b9..c8d731ac9563 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -705,6 +705,17 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) cpu = smp_processor_id(); if (state == TICK_BROADCAST_ENTER) { + /* + * If the current CPU owns the hrtimer broadcast + * mechanism, it cannot go deep idle and we do not add + * the CPU to the broadcast mask. We don't have to go + * through the EXIT path as the local timer is not + * shutdown. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) + goto out; + /* * If the broadcast device is in periodic mode, we * return. @@ -718,7 +729,10 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); + + /* Conditionally shut down the local timer. */ broadcast_shutdown_local(bc, dev); + /* * We only reprogram the broadcast timer if we * did not mark ourself in the force mask and @@ -733,18 +747,20 @@ int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) ret = -EBUSY; } else if (dev->next_event.tv64 < bc->next_event.tv64) { tick_broadcast_set_event(bc, cpu, dev->next_event); + /* + * In case of hrtimer broadcasts the + * programming might have moved the + * timer to this cpu. If yes, remove + * us from the broadcast mask and + * return busy. + */ + ret = broadcast_needs_cpu(bc, cpu); + if (ret) { + cpumask_clear_cpu(cpu, + tick_broadcast_oneshot_mask); + } } } - /* - * If the current CPU owns the hrtimer broadcast - * mechanism, it cannot go deep idle and we remove the - * CPU from the broadcast mask. We don't have to go - * through the EXIT path as the local timer is not - * shutdown. - */ - ret = broadcast_needs_cpu(bc, cpu); - if (ret) - cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); } else { if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); -- cgit From c4288334818c81c946acb23d2319881f58c3d497 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 20:53:17 +0000 Subject: tick/broadcast: Handle spurious interrupts gracefully Andriy reported that on a virtual machine the warning about negative expiry time in the clock events programming code triggered: hpet: hpet0 irq 40 for MSI hpet: hpet1 irq 41 for MSI Switching to clocksource hpet WARNING: at kernel/time/clockevents.c:239 [] clockevents_program_event+0xdb/0xf0 [] tick_handle_periodic_broadcast+0x41/0x50 [] timer_interrupt+0x15/0x20 When the second hpet is installed as a per cpu timer the broadcast event is not longer required and stopped, which sets the next_evt of the broadcast device to KTIME_MAX. If after that a spurious interrupt happens on the broadcast device, then the current code blindly handles it and tries to reprogram the broadcast device afterwards, which adds the period to next_evt. KTIME_MAX + period results in a negative expiry value causing the WARN_ON in the clockevents code to trigger. Add a proper check for the state of the broadcast device into the interrupt handler and return if the interrupt is spurious. [ Folded in pointer fix from Sudeep ] Reported-by: Andriy Gapon Signed-off-by: Thomas Gleixner Cc: Sudeep Holla Cc: Peter Zijlstra Cc: Preeti U Murthy Link: http://lkml.kernel.org/r/20150705205221.802094647@linutronix.de --- kernel/time/tick-broadcast.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index c8d731ac9563..ee3cf942d6eb 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -316,6 +316,13 @@ static void tick_handle_periodic_broadcast(struct clock_event_device *dev) bool bc_local; raw_spin_lock(&tick_broadcast_lock); + + /* Handle spurious interrupts gracefully */ + if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { + raw_spin_unlock(&tick_broadcast_lock); + return; + } + bc_local = tick_do_periodic_broadcast(); if (clockevent_state_oneshot(dev)) { -- cgit From 539c4b88146cc320aee18b08ebb43ab57d523dcc Mon Sep 17 00:00:00 2001 From: duson Date: Tue, 7 Jul 2015 10:25:39 -0700 Subject: Input: elan_i2c - change the hover event from MT to ST The firmware only reports hover condition while the very first contact is approaching the surface; the hover is not reported for the subsequent contacts. Therefore we should not be using ABS_MT_DISTANCE to report hover but rather its single-touch counterpart ABS_DISTANCE. Signed-off-by: Duson Lin Signed-off-by: Dmitry Torokhov --- drivers/input/mouse/elan_i2c_core.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 62641f2adaf7..5b5f403d8ce6 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -771,7 +771,7 @@ static const struct attribute_group *elan_sysfs_groups[] = { */ static void elan_report_contact(struct elan_tp_data *data, int contact_num, bool contact_valid, - bool hover_event, u8 *finger_data) + u8 *finger_data) { struct input_dev *input = data->input; unsigned int pos_x, pos_y; @@ -815,9 +815,7 @@ static void elan_report_contact(struct elan_tp_data *data, input_mt_report_slot_state(input, MT_TOOL_FINGER, true); input_report_abs(input, ABS_MT_POSITION_X, pos_x); input_report_abs(input, ABS_MT_POSITION_Y, data->max_y - pos_y); - input_report_abs(input, ABS_MT_DISTANCE, hover_event); - input_report_abs(input, ABS_MT_PRESSURE, - hover_event ? 0 : scaled_pressure); + input_report_abs(input, ABS_MT_PRESSURE, scaled_pressure); input_report_abs(input, ABS_TOOL_WIDTH, mk_x); input_report_abs(input, ABS_MT_TOUCH_MAJOR, major); input_report_abs(input, ABS_MT_TOUCH_MINOR, minor); @@ -839,14 +837,14 @@ static void elan_report_absolute(struct elan_tp_data *data, u8 *packet) hover_event = hover_info & 0x40; for (i = 0; i < ETP_MAX_FINGERS; i++) { contact_valid = tp_info & (1U << (3 + i)); - elan_report_contact(data, i, contact_valid, hover_event, - finger_data); + elan_report_contact(data, i, contact_valid, finger_data); if (contact_valid) finger_data += ETP_FINGER_DATA_LEN; } input_report_key(input, BTN_LEFT, tp_info & 0x01); + input_report_abs(input, ABS_DISTANCE, hover_event != 0); input_mt_report_pointer_emulation(input, true); input_sync(input); } @@ -922,6 +920,7 @@ static int elan_setup_input_device(struct elan_tp_data *data) input_abs_set_res(input, ABS_Y, data->y_res); input_set_abs_params(input, ABS_PRESSURE, 0, ETP_MAX_PRESSURE, 0, 0); input_set_abs_params(input, ABS_TOOL_WIDTH, 0, ETP_FINGER_WIDTH, 0, 0); + input_set_abs_params(input, ABS_DISTANCE, 0, 1, 0, 0); /* And MT parameters */ input_set_abs_params(input, ABS_MT_POSITION_X, 0, data->max_x, 0, 0); @@ -934,7 +933,6 @@ static int elan_setup_input_device(struct elan_tp_data *data) ETP_FINGER_WIDTH * max_width, 0, 0); input_set_abs_params(input, ABS_MT_TOUCH_MINOR, 0, ETP_FINGER_WIDTH * min_width, 0, 0); - input_set_abs_params(input, ABS_MT_DISTANCE, 0, 1, 0, 0); data->input = input; -- cgit From 71eeedcf51544831ae356a773814401143ed32d4 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 7 Jul 2015 20:49:03 +0200 Subject: MIPS: Lemote 2F: Fix build caused by recent mass rename. CC arch/mips/loongson64/lemote-2f/clock.o /home/ralf/src/linux/linux-mips/arch/mips/loongson64/lemote-2f/clock.c:18:40: fatal error: asm/mach-loongson/loongson.h: No such file or directory #include ^ compilation terminated. Signed-off-by: Ralf Baechle --- arch/mips/loongson64/lemote-2f/clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 462e34d46b4a..4c5b94af3643 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c @@ -15,7 +15,7 @@ #include #include -#include +#include static LIST_HEAD(clock_list); static DEFINE_SPINLOCK(clock_lock); -- cgit From 0bb383a2d8f51e32ecc156f3f84067420ffe6d20 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Tue, 7 Jul 2015 20:56:04 +0200 Subject: MIPS, CPUFREQ: Fix spelling of Institute. Signed-off-by: Ralf Baechle --- arch/mips/include/asm/mach-loongson64/mmzone.h | 2 +- arch/mips/loongson64/common/bonito-irq.c | 2 +- arch/mips/loongson64/common/cmdline.c | 2 +- arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c | 2 +- arch/mips/loongson64/common/env.c | 2 +- arch/mips/loongson64/common/irq.c | 2 +- arch/mips/loongson64/common/setup.c | 2 +- arch/mips/loongson64/fuloong-2e/irq.c | 2 +- arch/mips/loongson64/lemote-2f/clock.c | 2 +- arch/mips/loongson64/loongson-3/numa.c | 2 +- drivers/cpufreq/loongson2_cpufreq.c | 2 +- 11 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/mips/include/asm/mach-loongson64/mmzone.h b/arch/mips/include/asm/mach-loongson64/mmzone.h index 37c08a27b4f0..c9f7e231e66b 100644 --- a/arch/mips/include/asm/mach-loongson64/mmzone.h +++ b/arch/mips/include/asm/mach-loongson64/mmzone.h @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/arch/mips/loongson64/common/bonito-irq.c b/arch/mips/loongson64/common/bonito-irq.c index cc0e4fd548e6..4e116d23bab3 100644 --- a/arch/mips/loongson64/common/bonito-irq.c +++ b/arch/mips/loongson64/common/bonito-irq.c @@ -3,7 +3,7 @@ * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net * Copyright (C) 2000, 2001 Ralf Baechle (ralf@gnu.org) * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/cmdline.c b/arch/mips/loongson64/common/cmdline.c index 72fed003a536..01fbed137028 100644 --- a/arch/mips/loongson64/common/cmdline.c +++ b/arch/mips/loongson64/common/cmdline.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c index 12c75db23420..875037063a80 100644 --- a/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c +++ b/arch/mips/loongson64/common/cs5536/cs5536_mfgpt.c @@ -1,7 +1,7 @@ /* * CS5536 General timer functions * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/env.c b/arch/mips/loongson64/common/env.c index 22f04ca2ff3e..f6c44dd332e2 100644 --- a/arch/mips/loongson64/common/env.c +++ b/arch/mips/loongson64/common/env.c @@ -6,7 +6,7 @@ * Copyright 2003 ICT CAS * Author: Michael Guo * - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * Copyright (C) 2009 Lemote Inc. diff --git a/arch/mips/loongson64/common/irq.c b/arch/mips/loongson64/common/irq.c index 687003b19b45..d36d969a4a87 100644 --- a/arch/mips/loongson64/common/irq.c +++ b/arch/mips/loongson64/common/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/common/setup.c b/arch/mips/loongson64/common/setup.c index d477dd6bb326..2dc5122f0e09 100644 --- a/arch/mips/loongson64/common/setup.c +++ b/arch/mips/loongson64/common/setup.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/fuloong-2e/irq.c b/arch/mips/loongson64/fuloong-2e/irq.c index ef5ec8f3de5f..892963f860b7 100644 --- a/arch/mips/loongson64/fuloong-2e/irq.c +++ b/arch/mips/loongson64/fuloong-2e/irq.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2007 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2007 Lemote Inc. & Institute of Computing Technology * Author: Fuxin Zhang, zhangfx@lemote.com * * This program is free software; you can redistribute it and/or modify it diff --git a/arch/mips/loongson64/lemote-2f/clock.c b/arch/mips/loongson64/lemote-2f/clock.c index 4c5b94af3643..a78fb657068c 100644 --- a/arch/mips/loongson64/lemote-2f/clock.c +++ b/arch/mips/loongson64/lemote-2f/clock.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 12d14ed48778..6f9e010cec4d 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -1,6 +1,6 @@ /* * Copyright (C) 2010 Loongson Inc. & Lemote Inc. & - * Insititute of Computing Technology + * Institute of Computing Technology * Author: Xiang Gao, gaoxiang@ict.ac.cn * Huacai Chen, chenhc@lemote.com * Xiaofu Meng, Shuangshuang Zhang diff --git a/drivers/cpufreq/loongson2_cpufreq.c b/drivers/cpufreq/loongson2_cpufreq.c index fc897babab55..e362860c2b50 100644 --- a/drivers/cpufreq/loongson2_cpufreq.c +++ b/drivers/cpufreq/loongson2_cpufreq.c @@ -3,7 +3,7 @@ * * The 2E revision of loongson processor not support this feature. * - * Copyright (C) 2006 - 2008 Lemote Inc. & Insititute of Computing Technology + * Copyright (C) 2006 - 2008 Lemote Inc. & Institute of Computing Technology * Author: Yanhua, yanh@lemote.com * * This file is subject to the terms and conditions of the GNU General Public -- cgit From 37b64a42067a04a22468c4e52c12af00d72e462b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Jul 2015 21:56:34 +0200 Subject: tick/broadcast: Unbreak CONFIG_GENERIC_CLOCKEVENTS=n build Making tick_broadcast_oneshot_control() independent from CONFIG_GENERIC_CLOCKEVENTS_BROADCAST broke the build for CONFIG_GENERIC_CLOCKEVENTS=n because the function is not defined there. Provide a proper stub inline. Fixes: f32dd1170511 'tick/broadcast: Make idle check independent from mode and config' Reported-by: kbuild test robot Signed-off-by: Thomas Gleixner --- include/linux/tick.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/tick.h b/include/linux/tick.h index 6916dcb61857..edbfc9a5293e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -67,7 +67,14 @@ extern void tick_broadcast_control(enum tick_broadcast_mode mode); static inline void tick_broadcast_control(enum tick_broadcast_mode mode) { } #endif /* BROADCAST */ +#ifdef CONFIG_GENERIC_CLOCKEVENTS extern int tick_broadcast_oneshot_control(enum tick_broadcast_state state); +#else +static inline int tick_broadcast_oneshot_control(enum tick_broadcast_state state) +{ + return 0; +} +#endif static inline void tick_broadcast_enable(void) { -- cgit From 4f273959b850569253299987eee611927f048de7 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Wed, 8 Jul 2015 00:22:03 +0300 Subject: mei: nfc: fix deadlock on shutdown/suspend path In function mei_nfc_host_exit mei_cl_remove_device cannot be called under the device mutex as device removing flow invokes the device driver remove handler that calls in turn to mei_cl_disable_device which naturally acquires the device mutex. Also remove mei_cl_bus_remove_devices which has the same issue, but is never executed as currently the only device on the mei client bus is NFC and a new device cannot be easily added till the bus revamp is completed. This fixes regression caused by commit be9b720a0ccb ("mei_phy: move all nfc logic from mei driver to nfc") Prior to this change the nfc driver remove handler called to no-op disable function while actual nfc device was disabled directly from the mei driver. Reported-by: Linus Torvalds Acked-by: Greg Kroah-Hartman Cc: Samuel Ortiz Signed-off-by: Tomas Winkler Signed-off-by: Linus Torvalds --- drivers/misc/mei/bus.c | 16 ---------------- drivers/misc/mei/init.c | 2 -- drivers/misc/mei/nfc.c | 3 ++- 3 files changed, 2 insertions(+), 19 deletions(-) diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index 357b6ae4d207..458aa5a09c52 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -552,22 +552,6 @@ void mei_cl_bus_rx_event(struct mei_cl *cl) schedule_work(&device->event_work); } -void mei_cl_bus_remove_devices(struct mei_device *dev) -{ - struct mei_cl *cl, *next; - - mutex_lock(&dev->device_lock); - list_for_each_entry_safe(cl, next, &dev->device_list, device_link) { - if (cl->device) - mei_cl_remove_device(cl->device); - - list_del(&cl->device_link); - mei_cl_unlink(cl); - kfree(cl); - } - mutex_unlock(&dev->device_lock); -} - int __init mei_cl_bus_init(void) { return bus_register(&mei_cl_bus_type); diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c index 94514b2c7a50..00c3865ca3b1 100644 --- a/drivers/misc/mei/init.c +++ b/drivers/misc/mei/init.c @@ -333,8 +333,6 @@ void mei_stop(struct mei_device *dev) mei_nfc_host_exit(dev); - mei_cl_bus_remove_devices(dev); - mutex_lock(&dev->device_lock); mei_wd_stop(dev); diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c index b983c4ecad38..290ef3037437 100644 --- a/drivers/misc/mei/nfc.c +++ b/drivers/misc/mei/nfc.c @@ -402,11 +402,12 @@ void mei_nfc_host_exit(struct mei_device *dev) cldev->priv_data = NULL; - mutex_lock(&dev->device_lock); /* Need to remove the device here * since mei_nfc_free will unlink the clients */ mei_cl_remove_device(cldev); + + mutex_lock(&dev->device_lock); mei_nfc_free(ndev); mutex_unlock(&dev->device_lock); } -- cgit From 56551da9255f20ffd3a9711728a1a3ad4b7100af Mon Sep 17 00:00:00 2001 From: Pankaj Dev Date: Tue, 7 Jul 2015 09:40:50 +0200 Subject: drivers: clk: st: Incorrect register offset used for lock_status Incorrect register offset used for sthi407 clockgenC Signed-off-by: Pankaj Dev Signed-off-by: Gabriel Fernandez Fixes: 51306d56ba81 ("clk: st: STiH407: Support for clockgenC0") Signed-off-by: Stephen Boyd --- drivers/clk/st/clkgen-fsyn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/clk/st/clkgen-fsyn.c b/drivers/clk/st/clkgen-fsyn.c index 99c98c15d4a4..d9eb2e1d8471 100644 --- a/drivers/clk/st/clkgen-fsyn.c +++ b/drivers/clk/st/clkgen-fsyn.c @@ -340,7 +340,7 @@ static const struct clkgen_quadfs_data st_fs660c32_C_407 = { CLKGEN_FIELD(0x30c, 0xf, 20), CLKGEN_FIELD(0x310, 0xf, 20) }, .lockstatus_present = true, - .lock_status = CLKGEN_FIELD(0x2A0, 0x1, 24), + .lock_status = CLKGEN_FIELD(0x2f0, 0x1, 24), .powerup_polarity = 1, .standby_polarity = 1, .pll_ops = &st_quadfs_pll_c32_ops, -- cgit From 7928eb0370d1133d0d8cd2f5ddfca19c309079d5 Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Wed, 8 Jul 2015 04:49:10 +0200 Subject: MIPS: O32: Do not handle require 32 bytes from the stack to be readable. Commit 46e12c07b3b9603c60fc1d421ff18618241cb081 (MIPS: O32 / 32-bit: Always copy 4 stack arguments.) change the O32 syscall handler to always load four arguments from the userspace stack even for syscalls that require fewer or no arguments to be copied. This removes a large table from kernel space and need to maintain it. It appeared that it was ok the implementation chosen requires 16 bytes of readable stack space above the user stack pointer. Turned out a few threading implementations munmap the user stack before the thread exits resulting in errors due to the unreadable stack. We now treat any failed load as a if the loaded value was zero and let the actual syscall deal with the situation. Signed-off-by: Ralf Baechle --- arch/mips/kernel/scall32-o32.S | 37 +++++++++++++++++++++++++++---------- arch/mips/kernel/scall64-o32.S | 33 +++++++++++++++++++++++++-------- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 6e8de80bb446..4cc13508d967 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -73,10 +73,11 @@ NESTED(handle_sys, PT_SIZE, sp) .set noreorder .set nomacro -1: user_lw(t5, 16(t0)) # argument #5 from usp -4: user_lw(t6, 20(t0)) # argument #6 from usp -3: user_lw(t7, 24(t0)) # argument #7 from usp -2: user_lw(t8, 28(t0)) # argument #8 from usp +load_a4: user_lw(t5, 16(t0)) # argument #5 from usp +load_a5: user_lw(t6, 20(t0)) # argument #6 from usp +load_a6: user_lw(t7, 24(t0)) # argument #7 from usp +load_a7: user_lw(t8, 28(t0)) # argument #8 from usp +loads_done: sw t5, 16(sp) # argument #5 to ksp sw t6, 20(sp) # argument #6 to ksp @@ -85,10 +86,10 @@ NESTED(handle_sys, PT_SIZE, sp) .set pop .section __ex_table,"a" - PTR 1b,bad_stack - PTR 2b,bad_stack - PTR 3b,bad_stack - PTR 4b,bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous lw t0, TI_FLAGS($28) # syscall tracing enabled? @@ -153,8 +154,8 @@ syscall_trace_entry: /* ------------------------------------------------------------------------ */ /* - * The stackpointer for a call with more than 4 arguments is bad. - * We probably should handle this case a bit more drastic. + * Our open-coded access area sanity test for the stack pointer + * failed. We probably should handle this case a bit more drastic. */ bad_stack: li v0, EFAULT @@ -163,6 +164,22 @@ bad_stack: sw t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li t5, 0 + b load_a5 + +bad_stack_a5: + li t6, 0 + b load_a6 + +bad_stack_a6: + li t7, 0 + b load_a7 + +bad_stack_a7: + li t8, 0 + b loads_done + /* * The system call does not exist in this kernel */ diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d07b210fbeff..66d618bb2fa2 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -69,16 +69,17 @@ NESTED(handle_sys, PT_SIZE, sp) daddu t1, t0, 32 bltz t1, bad_stack -1: lw a4, 16(t0) # argument #5 from usp -2: lw a5, 20(t0) # argument #6 from usp -3: lw a6, 24(t0) # argument #7 from usp -4: lw a7, 28(t0) # argument #8 from usp (for indirect syscalls) +load_a4: lw a4, 16(t0) # argument #5 from usp +load_a5: lw a5, 20(t0) # argument #6 from usp +load_a6: lw a6, 24(t0) # argument #7 from usp +load_a7: lw a7, 28(t0) # argument #8 from usp +loads_done: .section __ex_table,"a" - PTR 1b, bad_stack - PTR 2b, bad_stack - PTR 3b, bad_stack - PTR 4b, bad_stack + PTR load_a4, bad_stack_a4 + PTR load_a5, bad_stack_a5 + PTR load_a6, bad_stack_a6 + PTR load_a7, bad_stack_a7 .previous li t1, _TIF_WORK_SYSCALL_ENTRY @@ -167,6 +168,22 @@ bad_stack: sd t0, PT_R7(sp) j o32_syscall_exit +bad_stack_a4: + li a4, 0 + b load_a5 + +bad_stack_a5: + li a5, 0 + b load_a6 + +bad_stack_a6: + li a6, 0 + b load_a7 + +bad_stack_a7: + li a7, 0 + b loads_done + not_o32_scall: /* * This is not an o32 compatibility syscall, pass it on -- cgit From 5caaf5346892d1e7f0b8b7223062644f8538483f Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 7 Jul 2015 15:45:46 +1000 Subject: cxl: Fail mmap if requested mapping is larger than assigned problem state area This patch makes the mmap call fail outright if the requested region is larger than the problem state area assigned to the context so the error is reported immediately rather than waiting for an attempt to access an address out of bounds. Although we never expect users to map more than the assigned problem state area and are not aware of anyone doing this (other than for testing), this does have the potential to break users if someone has used a larger range regardless. I'm submitting it for consideration, but if this change is not considered acceptable the previous patch is sufficient to prevent access out of bounds without breaking anyone. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 2a4c80ac322a..6236d4a1f7ad 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -145,8 +145,16 @@ static const struct vm_operations_struct cxl_mmap_vmops = { */ int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma) { + u64 start = vma->vm_pgoff << PAGE_SHIFT; u64 len = vma->vm_end - vma->vm_start; - len = min(len, ctx->psn_size); + + if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { + if (start + len > ctx->afu->adapter->ps_size) + return -EINVAL; + } else { + if (start + len > ctx->psn_size) + return -EINVAL; + } if (ctx->afu->current_mode != CXL_MODE_DEDICATED) { /* make sure there is a valid per process space for this AFU */ -- cgit From 10a5894f2dedd8a26b3132497445b314c0d952c4 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Tue, 7 Jul 2015 15:45:45 +1000 Subject: cxl: Fix off by one error allowing subsequent mmap page to be accessed It was discovered that if a process mmaped their problem state area they were able to access one page more than expected, potentially allowing them to access the problem state area of an unrelated process. This was due to a simple off by one error in the mmap fault handler introduced in 0712dc7e73e59d79bcead5d5520acf4e9e917e87 ("cxl: Fix issues when unmapping contexts"), which is fixed in this patch. Cc: stable@vger.kernel.org Fixes: 0712dc7e73e5 ("cxl: Fix issues when unmapping contexts") Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 6236d4a1f7ad..1287148629c0 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -113,11 +113,11 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->afu->current_mode == CXL_MODE_DEDICATED) { area = ctx->afu->psn_phys; - if (offset > ctx->afu->adapter->ps_size) + if (offset >= ctx->afu->adapter->ps_size) return VM_FAULT_SIGBUS; } else { area = ctx->psn_phys; - if (offset > ctx->psn_size) + if (offset >= ctx->psn_size) return VM_FAULT_SIGBUS; } -- cgit From 442053e57a4fc58b719b6ceab60f29ef9cf4404c Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Tue, 7 Jul 2015 12:21:10 -0400 Subject: powerpc/perf/24x7: Fix lockdep warning The sysfs attributes for the 24x7 counters are dynamically allocated. Initialize the attributes using sysfs_attr_init() to fix following warning which occurs when CONFIG_DEBUG_LOCK_VMALLOC=y. [ 0.346249] audit: initializing netlink subsys (disabled) [ 0.346284] audit: type=2000 audit(1436295254.340:1): initialized [ 0.346489] BUG: key c0000000efe90198 not in .data! [ 0.346491] DEBUG_LOCKS_WARN_ON(1) [ 0.346502] ------------[ cut here ]------------ [ 0.346504] WARNING: at ../kernel/locking/lockdep.c:3002 [ 0.346506] Modules linked in: Reported-by: Gustavo Luiz Duarte Signed-off-by: Sukadev Bhattiprolu Tested-by: Gustavo Luiz Duarte Signed-off-by: Michael Ellerman --- arch/powerpc/perf/hv-24x7.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c index ec2eb20631d1..df956295c2a7 100644 --- a/arch/powerpc/perf/hv-24x7.c +++ b/arch/powerpc/perf/hv-24x7.c @@ -320,6 +320,8 @@ static struct attribute *device_str_attr_create_(char *name, char *str) if (!attr) return NULL; + sysfs_attr_init(&attr->attr.attr); + attr->var = str; attr->attr.attr.name = name; attr->attr.attr.mode = 0444; -- cgit From 9958084a5275ca2e8f55c5b18729307f2f0cb53b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 7 Jul 2015 19:16:23 +1000 Subject: powerpc: Update MAINTAINERS to point at shared tree Now that we have a shared powerpc tree on kernel.org, point folks at that as the primary place to look for powerpc stuff. Signed-off-by: Michael Ellerman --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..b29f8ebc10b0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6161,7 +6161,7 @@ M: Michael Ellerman W: http://www.penguinppc.org/ L: linuxppc-dev@lists.ozlabs.org Q: http://patchwork.ozlabs.org/project/linuxppc-dev/list/ -T: git git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git +T: git git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git S: Supported F: Documentation/powerpc/ F: arch/powerpc/ -- cgit From 69711ca19b06d1b33d8f21213b540b5d1c638dbf Mon Sep 17 00:00:00 2001 From: Sébastien Hinderer Date: Wed, 8 Jul 2015 00:02:01 +0200 Subject: x86/kconfig: Fix typo in the CONFIG_CMDLINE_BOOL help text MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sébastien Hinderer Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Samuel Thibault Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 0b2929f49531..3dbb7e7909ca 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2020,7 +2020,7 @@ config CMDLINE_BOOL To compile command line arguments into the kernel, set this option to 'Y', then fill in the - the boot arguments in CONFIG_CMDLINE. + boot arguments in CONFIG_CMDLINE. Systems with fully functional boot loaders (i.e. non-embedded) should leave this option set to 'N'. -- cgit From 2c3d99845eb7b1e9f3d2863099bc5a25c8461d3f Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 6 Jul 2015 15:15:01 +0100 Subject: drm/i915: Restore all GGTT VMAs on resume When rotated and partial views were added no one spotted the resume path which assumes only one GGTT VMA per object and hence is now skipping rebind of alternative views. Signed-off-by: Tvrtko Ursulin Cc: Daniel Vetter Cc: Chris Wilson Cc: Joonas Lahtinen Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_gtt.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 9daa2883ac18..dcc6a88c560e 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2546,6 +2546,8 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; struct drm_i915_gem_object *obj; struct i915_address_space *vm; + struct i915_vma *vma; + bool flush; i915_check_and_clear_faults(dev); @@ -2555,16 +2557,23 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev) dev_priv->gtt.base.total, true); + /* Cache flush objects bound into GGTT and rebind them. */ + vm = &dev_priv->gtt.base; list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) { - struct i915_vma *vma = i915_gem_obj_to_vma(obj, - &dev_priv->gtt.base); - if (!vma) - continue; + flush = false; + list_for_each_entry(vma, &obj->vma_list, vma_link) { + if (vma->vm != vm) + continue; - i915_gem_clflush_object(obj, obj->pin_display); - WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE)); - } + WARN_ON(i915_vma_bind(vma, obj->cache_level, + PIN_UPDATE)); + flush = true; + } + + if (flush) + i915_gem_clflush_object(obj, obj->pin_display); + } if (INTEL_INFO(dev)->gen >= 8) { if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev)) -- cgit From a899418167264c7bac574b1a0f1b2c26c5b0995a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 5 Jul 2015 17:12:30 +0000 Subject: hotplug: Prevent alloc/free of irq descriptors during cpu up/down When a cpu goes up some architectures (e.g. x86) have to walk the irq space to set up the vector space for the cpu. While this needs extra protection at the architecture level we can avoid a few race conditions by preventing the concurrent allocation/free of irq descriptors and the associated data. When a cpu goes down it moves the interrupts which are targeted to this cpu away by reassigning the affinities. While this happens interrupts can be allocated and freed, which opens a can of race conditions in the code which reassignes the affinities because interrupt descriptors might be freed underneath. Example: CPU1 CPU2 cpu_up/down irq_desc = irq_to_desc(irq); remove_from_radix_tree(desc); raw_spin_lock(&desc->lock); free(desc); We could protect the irq descriptors with RCU, but that would require a full tree change of all accesses to interrupt descriptors. But fortunately these kind of race conditions are rather limited to a few things like cpu hotplug. The normal setup/teardown is very well serialized. So the simpler and obvious solution is: Prevent allocation and freeing of interrupt descriptors accross cpu hotplug. Signed-off-by: Thomas Gleixner Acked-by: Peter Zijlstra Cc: xiao jin Cc: Joerg Roedel Cc: Borislav Petkov Cc: Yanmin Zhang Link: http://lkml.kernel.org/r/20150705171102.063519515@linutronix.de --- include/linux/irqdesc.h | 7 ++++++- kernel/cpu.c | 22 +++++++++++++++++++++- kernel/irq/internals.h | 4 ---- 3 files changed, 27 insertions(+), 6 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 624a668e61f1..fcea4e48e21f 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -87,7 +87,12 @@ struct irq_desc { const char *name; } ____cacheline_internodealigned_in_smp; -#ifndef CONFIG_SPARSE_IRQ +#ifdef CONFIG_SPARSE_IRQ +extern void irq_lock_sparse(void); +extern void irq_unlock_sparse(void); +#else +static inline void irq_lock_sparse(void) { } +static inline void irq_unlock_sparse(void) { } extern struct irq_desc irq_desc[NR_IRQS]; #endif diff --git a/kernel/cpu.c b/kernel/cpu.c index 9c9c9fab16cc..6a374544d495 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include "smpboot.h" @@ -392,13 +393,19 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smpboot_park_threads(cpu); /* - * So now all preempt/rcu users must observe !cpu_active(). + * Prevent irq alloc/free while the dying cpu reorganizes the + * interrupt affinities. */ + irq_lock_sparse(); + /* + * So now all preempt/rcu users must observe !cpu_active(). + */ err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); + irq_unlock_sparse(); goto out_release; } BUG_ON(cpu_online(cpu)); @@ -415,6 +422,9 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) smp_mb(); /* Read from cpu_dead_idle before __cpu_die(). */ per_cpu(cpu_dead_idle, cpu) = false; + /* Interrupts are moved away from the dying cpu, reenable alloc/free */ + irq_unlock_sparse(); + hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); @@ -517,8 +527,18 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen) goto out_notify; } + /* + * Some architectures have to walk the irq descriptors to + * setup the vector space for the cpu which comes online. + * Prevent irq alloc/free across the bringup. + */ + irq_lock_sparse(); + /* Arch-specific enabling code. */ ret = __cpu_up(cpu, idle); + + irq_unlock_sparse(); + if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 4834ee828c41..61008b8433ab 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -76,12 +76,8 @@ extern void unmask_threaded_irq(struct irq_desc *desc); #ifdef CONFIG_SPARSE_IRQ static inline void irq_mark_irq(unsigned int irq) { } -extern void irq_lock_sparse(void); -extern void irq_unlock_sparse(void); #else extern void irq_mark_irq(unsigned int irq); -static inline void irq_lock_sparse(void) { } -static inline void irq_unlock_sparse(void) { } #endif extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); -- cgit From 63fef06ada94172d7b4295fc205441bafd8f8fb3 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 7 Jul 2015 11:15:46 +0200 Subject: drm/i915: Check crtc->active in intel_crtc_disable_planes This was lost in commit ce22dba92de22e951dee2ff89937a39754d2dd91 Author: Maarten Lankhorst Date: Tue Apr 21 17:12:56 2015 +0300 drm/i915: Move toggling planes out of crtc enable/disable. and we still need that crtc->active check since the overall modeset flow doesn't yet take dpms state into account properly. Fixes WARNING backtraces on at least bdw/hsw due to the ips disabling code being upset about being run on a switched-off pipe. We don't need a corresponding change on the enable side since with the old setCrtc semantics we always force-enable the pipe after a modeset. And the dpms function intel_crtc_control already checks for ->active. Reported-by: Linus Torvalds Cc: Linus Torvalds Cc: Maarten Lankhorst Cc: Ander Conselvan de Oliveira Signed-off-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 8aa803848f35..647b1404c441 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -4854,6 +4854,9 @@ static void intel_crtc_disable_planes(struct drm_crtc *crtc) struct intel_plane *intel_plane; int pipe = intel_crtc->pipe; + if (!intel_crtc->active) + return; + intel_crtc_wait_for_pending_flips(crtc); intel_pre_disable_primary(crtc); -- cgit From dec4f799d0a4c9edae20512fa60b0a36f3299ca2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Tue, 7 Jul 2015 11:15:47 +0200 Subject: drm/i915: Use crtc_state->active in primary check_plane func Since commit 8c7b5ccb729870e606321b3703e2c2e698c49a95 Author: Ander Conselvan de Oliveira Date: Tue Apr 21 17:13:19 2015 +0300 drm/i915: Use atomic helpers for computing changed flags we compute the plane state for a modeset before actually committing any changes, which means crtc->active won't be correct yet. Looking at future work in the modeset conversion targetting 4.3 the only places where crtc_state->active isn't accurate is when disabling other CRTCs than the one the modeset is for (when stealing connectors). Which isn't the case here. And that's also confirmed by an audit, we do unconditionally update crtc_state->active for the current pipe. We also don't need to update any other plane check functions since we only ever add the primary state to the modeset update right now. Cc: Ander Conselvan de Oliveira Cc: Maarten Lankhorst Cc: Jani Nikula Signed-off-by: Daniel Vetter Reviewed-by: Maarten Lankhorst Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index 647b1404c441..ba9321998a41 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13276,7 +13276,7 @@ intel_check_primary_plane(struct drm_plane *plane, if (ret) return ret; - if (intel_crtc->active) { + if (crtc_state->base.active) { struct intel_plane_state *old_state = to_intel_plane_state(plane->state); -- cgit From 45820c294fe1b1a9df495d57f40585ef2d069a39 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Jul 2015 09:33:38 -0700 Subject: Fix broken audit tests for exec arg len The "fix" in commit 0b08c5e5944 ("audit: Fix check of return value of strnlen_user()") didn't fix anything, it broke things. As reported by Steven Rostedt: "Yes, strnlen_user() returns 0 on fault, but if you look at what len is set to, than you would notice that on fault len would be -1" because we just subtracted one from the return value. So testing against 0 doesn't test for a fault condition, it tests against a perfectly valid empty string. Also fix up the usual braindamage wrt using WARN_ON() inside a conditional - make it part of the conditional and remove the explicit unlikely() (which is already part of the WARN_ON*() logic, exactly so that you don't have to write unreadable code. Reported-and-tested-by: Steven Rostedt Cc: Jan Kara Cc: Paul Moore Signed-off-by: Linus Torvalds --- kernel/auditsc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 09c65640cad6..e85bdfd15fed 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1021,8 +1021,7 @@ static int audit_log_single_execve_arg(struct audit_context *context, * for strings that are too long, we should not have created * any. */ - if (unlikely((len == 0) || len > MAX_ARG_STRLEN - 1)) { - WARN_ON(1); + if (WARN_ON_ONCE(len < 0 || len > MAX_ARG_STRLEN - 1)) { send_sig(SIGKILL, current, 0); return -1; } -- cgit From 07f18f0bb8d8d65badd8b4988b40d329fc0cc6dc Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 3 Jul 2015 06:03:06 +0200 Subject: drm/radeon: Handle irqs only based on irq ring, not irq status regs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Trying to resolve issues with missed vblanks and impossible values inside delivered kms pageflip completion events showed that radeon's irq handling sometimes doesn't handle valid irqs, but silently skips them. This was observed for vblank interrupts. Although those irqs have corresponding events queued in the gpu's irq ring at time of interrupt, and therefore the corresponding handling code gets triggered by these events, the handling code sometimes silently skipped processing the irq. The reason for those skips is that the handling code double-checks for each irq event if the corresponding irq status bits in the irq status registers are set. Sometimes those bits are not set at time of check for valid irqs, maybe due to some hardware race on some setups? The problem only seems to happen on some machine + card combos sometimes, e.g., never happened during my testing of different PC cards of the DCE-2/3/4 generation a year ago, but happens consistently now on two different Apple Mac cards (RV730, DCE-3, Apple iMac and Evergreen JUNIPER, DCE-4 in a Apple MacPro). It also doesn't happen at each interrupt but only occassionally every couple of hundred or thousand vblank interrupts. This results in XOrg warning messages like "[ 7084.472] (WW) RADEON(0): radeon_dri2_flip_event_handler: Pageflip completion event has impossible msc 420120 < target_msc 420121" as well as skipped frames and problems for applications that use kms pageflip events or vblank events, e.g., users of DRI2 and DRI3/Present, Waylands Weston compositor, etc. See also https://bugs.freedesktop.org/show_bug.cgi?id=85203 After some talking to Alex and Michel, we decided to fix this by turning the double-check for asserted irq status bits into a warning. Whenever a irq event is queued in the IH ring, always execute the corresponding interrupt handler. Still check the irq status bits, but only to log a DRM_DEBUG message on a mismatch. This fixed the problems reliably on both previously failing cards, RV-730 dual-head tested on both crtcs (pipes D1 and D2) and a triple-output Juniper HD-5770 card tested on all three available crtcs (D1/D2/D3). The r600 and evergreen irq handling is therefore tested, but the cik an si handling is only compile tested due to lack of hw. Reviewed-by: Christian König Signed-off-by: Mario Kleiner CC: Michel Dänzer CC: Alex Deucher CC: # v3.16+ Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/cik.c | 336 +++++++++++++++++-------------- drivers/gpu/drm/radeon/evergreen.c | 392 ++++++++++++++++++++----------------- drivers/gpu/drm/radeon/r600.c | 155 ++++++++------- drivers/gpu/drm/radeon/si.c | 336 +++++++++++++++++-------------- 4 files changed, 688 insertions(+), 531 deletions(-) diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 4ecf5caa8c6d..248953d2fdb7 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -7964,23 +7964,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -7990,23 +7994,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8016,23 +8024,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8042,23 +8054,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8068,23 +8084,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8094,23 +8114,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -8130,88 +8154,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c index 3a6d483a2c36..0acde1949c18 100644 --- a/drivers/gpu/drm/radeon/evergreen.c +++ b/drivers/gpu/drm/radeon/evergreen.c @@ -4924,7 +4924,7 @@ restart_ih: return IRQ_NONE; rptr = rdev->ih.rptr; - DRM_DEBUG("r600_irq_process start: rptr %d, wptr %d\n", rptr, wptr); + DRM_DEBUG("evergreen_irq_process start: rptr %d, wptr %d\n", rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); @@ -4942,23 +4942,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4968,23 +4972,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4994,23 +5002,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D3 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D3 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5020,23 +5032,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D4 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D4 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5046,23 +5062,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D5 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D5 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5072,23 +5092,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D6 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D6 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5108,88 +5132,100 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -5199,46 +5235,52 @@ restart_ih: case 44: /* hdmi */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status1 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status1 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); break; case 1: - if (rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status2 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status2 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); break; case 2: - if (rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status3 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status3 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI2\n"); break; case 3: - if (rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status4 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status4 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI3\n"); break; case 4: - if (rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status5 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status5 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI4\n"); break; case 5: - if (rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.afmt_status6 & AFMT_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.afmt_status6 &= ~AFMT_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI5\n"); break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c index 35dafd77a639..4ea5b10ff5f4 100644 --- a/drivers/gpu/drm/radeon/r600.c +++ b/drivers/gpu/drm/radeon/r600.c @@ -4086,23 +4086,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D1 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D1 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4112,23 +4116,27 @@ restart_ih: case 5: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: D2 vblank - IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: D2 vline - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4148,46 +4156,53 @@ restart_ih: case 19: /* HPD/DAC hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: HPD1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); break; case 1: - if (rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: HPD2 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); break; case 4: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: HPD3 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); break; case 5: - if (rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: HPD4 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); break; case 10: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: HPD5 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); break; case 12: - if (rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.r600.disp_int_cont2 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: HPD6 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.disp_int_cont2 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -4197,18 +4212,22 @@ restart_ih: case 21: /* hdmi */ switch (src_data) { case 4: - if (rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI0\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi0_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI0 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi0_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI0\n"); + break; case 5: - if (rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG) { - rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; - queue_hdmi = true; - DRM_DEBUG("IH: HDMI1\n"); - } + if (!(rdev->irq.stat_regs.r600.hdmi1_status & HDMI0_AZ_FORMAT_WTRIG)) + DRM_DEBUG("IH: HDMI1 - IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.r600.hdmi1_status &= ~HDMI0_AZ_FORMAT_WTRIG; + queue_hdmi = true; + DRM_DEBUG("IH: HDMI1\n"); + break; default: DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data); diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c index 26388b5dd6ed..07037e32dea3 100644 --- a/drivers/gpu/drm/radeon/si.c +++ b/drivers/gpu/drm/radeon/si.c @@ -6466,23 +6466,27 @@ restart_ih: case 1: /* D1 vblank/vline */ switch (src_data) { case 0: /* D1 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[0]) { - drm_handle_vblank(rdev->ddev, 0); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[0])) - radeon_crtc_handle_vblank(rdev, 0); - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D1 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[0]) { + drm_handle_vblank(rdev->ddev, 0); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[0])) + radeon_crtc_handle_vblank(rdev, 0); + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D1 vblank\n"); + break; case 1: /* D1 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; - DRM_DEBUG("IH: D1 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT; + DRM_DEBUG("IH: D1 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6492,23 +6496,27 @@ restart_ih: case 2: /* D2 vblank/vline */ switch (src_data) { case 0: /* D2 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[1]) { - drm_handle_vblank(rdev->ddev, 1); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[1])) - radeon_crtc_handle_vblank(rdev, 1); - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D2 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[1]) { + drm_handle_vblank(rdev->ddev, 1); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[1])) + radeon_crtc_handle_vblank(rdev, 1); + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D2 vblank\n"); + break; case 1: /* D2 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; - DRM_DEBUG("IH: D2 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT; + DRM_DEBUG("IH: D2 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6518,23 +6526,27 @@ restart_ih: case 3: /* D3 vblank/vline */ switch (src_data) { case 0: /* D3 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[2]) { - drm_handle_vblank(rdev->ddev, 2); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[2])) - radeon_crtc_handle_vblank(rdev, 2); - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D3 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[2]) { + drm_handle_vblank(rdev->ddev, 2); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[2])) + radeon_crtc_handle_vblank(rdev, 2); + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D3 vblank\n"); + break; case 1: /* D3 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; - DRM_DEBUG("IH: D3 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT; + DRM_DEBUG("IH: D3 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6544,23 +6556,27 @@ restart_ih: case 4: /* D4 vblank/vline */ switch (src_data) { case 0: /* D4 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[3]) { - drm_handle_vblank(rdev->ddev, 3); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[3])) - radeon_crtc_handle_vblank(rdev, 3); - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D4 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[3]) { + drm_handle_vblank(rdev->ddev, 3); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[3])) + radeon_crtc_handle_vblank(rdev, 3); + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D4 vblank\n"); + break; case 1: /* D4 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; - DRM_DEBUG("IH: D4 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT; + DRM_DEBUG("IH: D4 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6570,23 +6586,27 @@ restart_ih: case 5: /* D5 vblank/vline */ switch (src_data) { case 0: /* D5 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[4]) { - drm_handle_vblank(rdev->ddev, 4); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[4])) - radeon_crtc_handle_vblank(rdev, 4); - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D5 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[4]) { + drm_handle_vblank(rdev->ddev, 4); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[4])) + radeon_crtc_handle_vblank(rdev, 4); + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D5 vblank\n"); + break; case 1: /* D5 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; - DRM_DEBUG("IH: D5 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT; + DRM_DEBUG("IH: D5 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6596,23 +6616,27 @@ restart_ih: case 6: /* D6 vblank/vline */ switch (src_data) { case 0: /* D6 vblank */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) { - if (rdev->irq.crtc_vblank_int[5]) { - drm_handle_vblank(rdev->ddev, 5); - rdev->pm.vblank_sync = true; - wake_up(&rdev->irq.vblank_queue); - } - if (atomic_read(&rdev->irq.pflip[5])) - radeon_crtc_handle_vblank(rdev, 5); - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; - DRM_DEBUG("IH: D6 vblank\n"); + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (rdev->irq.crtc_vblank_int[5]) { + drm_handle_vblank(rdev->ddev, 5); + rdev->pm.vblank_sync = true; + wake_up(&rdev->irq.vblank_queue); } + if (atomic_read(&rdev->irq.pflip[5])) + radeon_crtc_handle_vblank(rdev, 5); + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT; + DRM_DEBUG("IH: D6 vblank\n"); + break; case 1: /* D6 vline */ - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; - DRM_DEBUG("IH: D6 vline\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT; + DRM_DEBUG("IH: D6 vline\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); @@ -6632,88 +6656,112 @@ restart_ih: case 42: /* HPD hotplug */ switch (src_data) { case 0: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD1\n"); + break; case 1: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD2\n"); + break; case 2: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD3\n"); + break; case 3: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD4\n"); + break; case 4: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD5\n"); + break; case 5: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; - queue_hotplug = true; - DRM_DEBUG("IH: HPD6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT; + queue_hotplug = true; + DRM_DEBUG("IH: HPD6\n"); + break; case 6: - if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 1\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 1\n"); + break; case 7: - if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 2\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 2\n"); + break; case 8: - if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 3\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 3\n"); + break; case 9: - if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 4\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 4\n"); + break; case 10: - if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 5\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 5\n"); + break; case 11: - if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) { - rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; - queue_dp = true; - DRM_DEBUG("IH: HPD_RX 6\n"); - } + if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT)) + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT; + queue_dp = true; + DRM_DEBUG("IH: HPD_RX 6\n"); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data); -- cgit From bd833144a23dead304744dc748f5d72d7e92d315 Mon Sep 17 00:00:00 2001 From: Mario Kleiner Date: Fri, 3 Jul 2015 06:03:07 +0200 Subject: drm/amdgpu: Handle irqs only based on irq ring, not irq status regs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a translation of the patch ... "drm/radeon: Handle irqs only based on irq ring, not irq status regs." ... for the vblank irq handling, to fix the same problem described in that patch on the new driver. Only compile tested due to lack of suitable hw. Reviewed-by: Christian König Signed-off-by: Mario Kleiner CC: Michel Dänzer CC: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/dce_v10_0.c | 22 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_v11_0.c | 22 ++++++++++++++-------- drivers/gpu/drm/amd/amdgpu/dce_v8_0.c | 22 ++++++++++++++-------- 3 files changed, 42 insertions(+), 24 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5cde635978f9..6e77964f1b64 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -3403,19 +3403,25 @@ static int dce_v10_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v10_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v10_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 95efd98b202d..7f7abb0e0be5 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -3402,19 +3402,25 @@ static int dce_v11_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) dce_v11_0_crtc_vblank_int_ack(adev, crtc); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) dce_v11_0_crtc_vline_int_ack(adev, crtc); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index aaca8d663f2c..08387dfd98a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -3237,19 +3237,25 @@ static int dce_v8_0_crtc_irq(struct amdgpu_device *adev, switch (entry->src_data) { case 0: /* vblank */ - if (disp_int & interrupt_status_offsets[crtc].vblank) { + if (disp_int & interrupt_status_offsets[crtc].vblank) WREG32(mmLB_VBLANK_STATUS + crtc_offsets[crtc], LB_VBLANK_STATUS__VBLANK_ACK_MASK); - if (amdgpu_irq_enabled(adev, source, irq_type)) { - drm_handle_vblank(adev->ddev, crtc); - } - DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + if (amdgpu_irq_enabled(adev, source, irq_type)) { + drm_handle_vblank(adev->ddev, crtc); } + DRM_DEBUG("IH: D%d vblank\n", crtc + 1); + break; case 1: /* vline */ - if (disp_int & interrupt_status_offsets[crtc].vline) { + if (disp_int & interrupt_status_offsets[crtc].vline) WREG32(mmLB_VLINE_STATUS + crtc_offsets[crtc], LB_VLINE_STATUS__VLINE_ACK_MASK); - DRM_DEBUG("IH: D%d vline\n", crtc + 1); - } + else + DRM_DEBUG("IH: IH event w/o asserted irq bit?\n"); + + DRM_DEBUG("IH: D%d vline\n", crtc + 1); + break; default: DRM_DEBUG("Unhandled interrupt: %d %d\n", entry->src_id, entry->src_data); -- cgit From 828202a382a06eda74202d4888d01a1078c68922 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:10 +0200 Subject: drm/radeon: use RCU query for GEM_BUSY syscall MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't need to call the (expensive) radeon_bo_wait, checking the fences via RCU is much faster. The reservation done by radeon_bo_wait does not save us from any race conditions. Reviewed-by: Christian König Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index ac3c1310b953..7199e19eb8ba 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -428,7 +428,6 @@ int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data, int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct radeon_device *rdev = dev->dev_private; struct drm_radeon_gem_busy *args = data; struct drm_gem_object *gobj; struct radeon_bo *robj; @@ -440,10 +439,16 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data, return -ENOENT; } robj = gem_to_radeon_bo(gobj); - r = radeon_bo_wait(robj, &cur_placement, true); + + r = reservation_object_test_signaled_rcu(robj->tbo.resv, true); + if (r == 0) + r = -EBUSY; + else + r = 0; + + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); args->domain = radeon_mem_type_to_domain(cur_placement); drm_gem_object_unreference_unlocked(gobj); - r = radeon_gem_handle_lockup(rdev, r); return r; } -- cgit From 54e03986133468e02cb01b76215e4d53a9cf6380 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:11 +0200 Subject: drm/radeon: fix HDP flushing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was regressed by commit 39e7f6f8, although I don't know of any actual issues caused by it. The storage domain is read without TTM locking now, but the lock never helped to prevent any races. Reviewed-by: Christian König Cc: stable@vger.kernel.org Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_gem.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index 7199e19eb8ba..013ec7106e55 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -476,6 +476,7 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data, r = ret; /* Flush HDP cache via MMIO if necessary */ + cur_placement = ACCESS_ONCE(robj->tbo.mem.mem_type); if (rdev->asic->mmio_hdp_flush && radeon_mem_type_to_domain(cur_placement) == RADEON_GEM_DOMAIN_VRAM) robj->rdev->asic->mmio_hdp_flush(rdev); -- cgit From 5e3c4f907043a7fae1b48e86536dc7b9efa07e29 Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Fri, 3 Jul 2015 01:54:12 +0200 Subject: drm/radeon: default to 2048 MB GART size on SI+ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newer ASICs have more VRAM on average and allocating more GART as well can have advantages. Also see commit edcd26e8. Ideally, we should scale GART size based on actual VRAM size, but that requires significant restructuring of initialization. v2: extract small helper, apply to error paths Reviewed-by: Christian König Signed-off-by: Grigori Goronzy Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 2593b1168bd6..1a532a9bc3a3 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1079,6 +1079,22 @@ static bool radeon_check_pot_argument(int arg) return (arg & (arg - 1)) == 0; } +/** + * Determine a sensible default GART size according to ASIC family. + * + * @family ASIC family name + */ +static int radeon_gart_size_auto(enum radeon_family family) +{ + /* default to a larger gart size on newer asics */ + if (family >= CHIP_TAHITI) + return 2048; + else if (family >= CHIP_RV770) + return 1024; + else + return 512; +} + /** * radeon_check_arguments - validate module params * @@ -1097,27 +1113,17 @@ static void radeon_check_arguments(struct radeon_device *rdev) } if (radeon_gart_size == -1) { - /* default to a larger gart size on newer asics */ - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } /* gtt size must be power of two and greater or equal to 32M */ if (radeon_gart_size < 32) { dev_warn(rdev->dev, "gart size (%d) too small\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } else if (!radeon_check_pot_argument(radeon_gart_size)) { dev_warn(rdev->dev, "gart size (%d) must be a power of 2\n", radeon_gart_size); - if (rdev->family >= CHIP_RV770) - radeon_gart_size = 1024; - else - radeon_gart_size = 512; + radeon_gart_size = radeon_gart_size_auto(rdev->family); } rdev->mc.gtt_size = (uint64_t)radeon_gart_size << 20; -- cgit From 866a92040392d844d151aa6a2ba15be2e6e5df4f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 3 Jul 2015 11:54:28 +0300 Subject: drm/radeon: fix underflow in r600_cp_dispatch_texture() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "if (pass_size > buf->total)" can underflow so I have changed the type of size and pass_size to unsigned to avoid this problem. Reviewed-by: Christian König Signed-off-by: Dan Carpenter Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/r600_cp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c index 09e3f39925fa..98f9adaccc3d 100644 --- a/drivers/gpu/drm/radeon/r600_cp.c +++ b/drivers/gpu/drm/radeon/r600_cp.c @@ -2483,7 +2483,7 @@ int r600_cp_dispatch_texture(struct drm_device *dev, struct drm_buf *buf; u32 *buffer; const u8 __user *data; - int size, pass_size; + unsigned int size, pass_size; u64 src_offset, dst_offset; if (!radeon_check_offset(dev_priv, tex->offset)) { -- cgit From 1b42804d27b1c2623309950e9b203b11f4c67f4f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 7 Jul 2015 18:00:49 +0100 Subject: arm64: entry: handle debug exceptions in el*_inv Currently we enable debug exceptions before reading ESR_EL1 in both el0_inv and el1_inv. If a debug exception is taken before we read ESR_EL1, the value will have been corrupted. As el*_inv is typically fatal, an intervening debug exception results in misleading debug information being logged to the console, but is not otherwise harmful. As with the other entry paths, we can use the ESR_EL1 value stashed earlier in the exception entry (in x25 for el0_sync{,_compat}, and x1 for el1_sync), giving us better error reporting in this case. Signed-off-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a7691a378668..f860bfda454a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -352,8 +352,8 @@ el1_inv: // TODO: add support for undefined instructions in kernel mode enable_dbg mov x0, sp + mov x2, x1 mov x1, #BAD_SYNC - mrs x2, esr_el1 b bad_mode ENDPROC(el1_sync) @@ -553,7 +553,7 @@ el0_inv: ct_user_exit mov x0, sp mov x1, #BAD_SYNC - mrs x2, esr_el1 + mov x2, x25 bl bad_mode b ret_to_user ENDPROC(el0_sync) -- cgit From 3d8cc14152a4b661580761049c7b9711efa60d82 Mon Sep 17 00:00:00 2001 From: Y Vo Date: Fri, 26 Jun 2015 15:01:47 +0700 Subject: arm64: dts: Add poweroff button device node for APM X-Gene platform This patch adds poweroff button device node to support poweroff feature on APM X-Gene Mustang platform. Signed-off-by: Y Vo Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/apm/apm-mustang.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts index 83578e766b94..4c55833d8a41 100644 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts @@ -23,6 +23,16 @@ device_type = "memory"; reg = < 0x1 0x00000000 0x0 0x80000000 >; /* Updated by bootloader */ }; + + gpio-keys { + compatible = "gpio-keys"; + button@1 { + label = "POWER"; + linux,code = <116>; + linux,input-type = <0x1>; + interrupts = <0x0 0x2d 0x1>; + }; + }; }; &pcie0clk { -- cgit From efc5120b8259243fa945d0028450c0a7a5a4b9ef Mon Sep 17 00:00:00 2001 From: Tirumalesh Chalamarla Date: Fri, 26 Jun 2015 12:12:23 -0700 Subject: GICv3: Add ITS entry to THUNDER dts The PCIe host controller uses MSIs provided by GICv3 ITS. Enable it on Thunder SoCs by adding an entry to DT. Signed-off-by: Tirumalesh Chalamarla Acked-by: Marc Zyngier Signed-off-by: Kevin Hilman --- arch/arm64/boot/dts/cavium/thunder-88xx.dtsi | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi index d8c0bdc51882..9cb7cf94284a 100644 --- a/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder-88xx.dtsi @@ -376,10 +376,19 @@ gic0: interrupt-controller@8010,00000000 { compatible = "arm,gic-v3"; #interrupt-cells = <3>; + #address-cells = <2>; + #size-cells = <2>; + ranges; interrupt-controller; reg = <0x8010 0x00000000 0x0 0x010000>, /* GICD */ <0x8010 0x80000000 0x0 0x600000>; /* GICR */ interrupts = <1 9 0xf04>; + + its: gic-its@8010,00020000 { + compatible = "arm,gic-v3-its"; + msi-controller; + reg = <0x8010 0x20000 0x0 0x200000>; + }; }; uaa0: serial@87e0,24000000 { -- cgit From 758556bdc1c8a8dffea0ea9f9df891878cc2468c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 9 Jul 2015 06:48:06 +0930 Subject: module: Fix load_module() error path The load_module() error path frees a module but forgot to take it out of the mod_tree, leaving a dangling entry in the tree, causing havoc. Cc: Mathieu Desnoyers Reported-by: Arthur Marsh Tested-by: Arthur Marsh Fixes: 93c2e105f6bc ("module: Optimize __module_address() using a latched RB-tree") Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Rusty Russell --- kernel/module.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/module.c b/kernel/module.c index 3e0e19763d24..4d2b82e610e2 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3557,6 +3557,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ synchronize_sched(); -- cgit From 4d44f2a0266cdcc1226c7d94431ab1d57d0f6d53 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 1 Jul 2015 13:36:01 +0100 Subject: arm: dts: vexpress: describe all PMUs in TC2 dts The dts for the CoreTile Express A15x2 A7x3 (TC2) only describes the PMUs of the Cortex-A15 CPUs, and not the Cortex-A7 CPUs. Now that we have a mechanism for describing disparate PMUs and their interrupts in device tree, this patch makes use of these to describe the PMUs for all CPUs in the system. For consistency, the existing A15 PMU interrupt-affinity property is reflowed across two lines. Signed-off-by: Mark Rutland Acked-by: Will Deacon Acked-by: Sudeep Holla Cc: Liviu Dudau Cc: Lorenzo Pieralisi Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 107395c32d82..038e30e4332f 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -187,11 +187,22 @@ <1 10 0xf08>; }; - pmu { + pmu_a15 { compatible = "arm,cortex-a15-pmu"; interrupts = <0 68 4>, <0 69 4>; - interrupt-affinity = <&cpu0>, <&cpu1>; + interrupt-affinity = <&cpu0>, + <&cpu1>; + }; + + pmu_a7 { + compatible = "arm,cortex-a7-pmu"; + interrupts = <0 128 4>, + <0 129 4>, + <0 130 4>; + interrupt-affinity = <&cpu2>, + <&cpu3>, + <&cpu4>; }; oscclk6a: oscclk6a { -- cgit From 3adf7aaa76e720b52092e4bf8e14d8d231583af6 Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Wed, 1 Jul 2015 13:36:02 +0100 Subject: arm: dts: vexpress: add missing CCI PMU device node to TC2 The CCI device node was added to vexpress CA15_A7(i.e. TC2) much before the CCI PMU support and binding was added. This patch adds the missing PMU node so that CCI PMUs can be used on TC2. Cc: Liviu Dudau Cc: Lorenzo Pieralisi Acked-by: Punit Agrawal Signed-off-by: Sudeep Holla Signed-off-by: Kevin Hilman --- arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts index 038e30e4332f..17f63f7dfd9e 100644 --- a/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts +++ b/arch/arm/boot/dts/vexpress-v2p-ca15_a7.dts @@ -150,6 +150,16 @@ interface-type = "ace"; reg = <0x5000 0x1000>; }; + + pmu@9000 { + compatible = "arm,cci-400-pmu,r0"; + reg = <0x9000 0x5000>; + interrupts = <0 105 4>, + <0 101 4>, + <0 102 4>, + <0 103 4>, + <0 104 4>; + }; }; memory-controller@7ffd0000 { -- cgit From 9ccd608070b6d4fd1a89fd76a84184fe401fb6a8 Mon Sep 17 00:00:00 2001 From: Kristina Martsenko Date: Wed, 1 Jul 2015 13:36:03 +0100 Subject: arm64: dts: add device tree for ARM SMM-A53x2 on LogicTile Express 20MG Add a DTS file for the MP2 Cortex-A53 Soft Macrocell Model implemented on a LogicTile Express 20MG (V2F-1XV7) daughterboard. This is based on the version that's currently available from the ARM DTS repository [1]. [1] git://linux-arm.org/arm-dts.git Signed-off-by: Kristina Martsenko Acked-by: Sudeep Holla Signed-off-by: Kevin Hilman --- MAINTAINERS | 1 + arch/arm64/boot/dts/arm/Makefile | 1 + .../boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts | 191 +++++++++++++++++++++ 3 files changed, 193 insertions(+) create mode 100644 arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..3729a4a1d560 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1614,6 +1614,7 @@ M: Lorenzo Pieralisi L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers) S: Maintained F: arch/arm/boot/dts/vexpress* +F: arch/arm64/boot/dts/arm/vexpress* F: arch/arm/mach-vexpress/ F: */*/vexpress* F: */*/*/vexpress* diff --git a/arch/arm64/boot/dts/arm/Makefile b/arch/arm64/boot/dts/arm/Makefile index c5c98b91514e..bb3c07209676 100644 --- a/arch/arm64/boot/dts/arm/Makefile +++ b/arch/arm64/boot/dts/arm/Makefile @@ -1,6 +1,7 @@ dtb-$(CONFIG_ARCH_VEXPRESS) += foundation-v8.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += juno.dtb juno-r1.dtb dtb-$(CONFIG_ARCH_VEXPRESS) += rtsm_ve-aemv8a.dtb +dtb-$(CONFIG_ARCH_VEXPRESS) += vexpress-v2f-1xv7-ca53x2.dtb always := $(dtb-y) subdir-y := $(dts-dirs) diff --git a/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts new file mode 100644 index 000000000000..5b1d0181023b --- /dev/null +++ b/arch/arm64/boot/dts/arm/vexpress-v2f-1xv7-ca53x2.dts @@ -0,0 +1,191 @@ +/* + * ARM Ltd. Versatile Express + * + * LogicTile Express 20MG + * V2F-1XV7 + * + * Cortex-A53 (2 cores) Soft Macrocell Model + * + * HBI-0247C + */ + +/dts-v1/; + +#include + +/ { + model = "V2F-1XV7 Cortex-A53x2 SMM"; + arm,hbi = <0x247>; + arm,vexpress,site = <0xf>; + compatible = "arm,vexpress,v2f-1xv7,ca53x2", "arm,vexpress,v2f-1xv7", "arm,vexpress"; + interrupt-parent = <&gic>; + #address-cells = <2>; + #size-cells = <2>; + + chosen { + stdout-path = "serial0:38400n8"; + }; + + aliases { + serial0 = &v2m_serial0; + serial1 = &v2m_serial1; + serial2 = &v2m_serial2; + serial3 = &v2m_serial3; + i2c0 = &v2m_i2c_dvi; + i2c1 = &v2m_i2c_pcie; + }; + + cpus { + #address-cells = <2>; + #size-cells = <0>; + + cpu@0 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0 0>; + next-level-cache = <&L2_0>; + }; + + cpu@1 { + device_type = "cpu"; + compatible = "arm,cortex-a53", "arm,armv8"; + reg = <0 1>; + next-level-cache = <&L2_0>; + }; + + L2_0: l2-cache0 { + compatible = "cache"; + }; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0 0x80000000 0 0x80000000>; /* 2GB @ 2GB */ + }; + + gic: interrupt-controller@2c001000 { + compatible = "arm,gic-400"; + #interrupt-cells = <3>; + #address-cells = <0>; + interrupt-controller; + reg = <0 0x2c001000 0 0x1000>, + <0 0x2c002000 0 0x2000>, + <0 0x2c004000 0 0x2000>, + <0 0x2c006000 0 0x2000>; + interrupts = ; + }; + + timer { + compatible = "arm,armv8-timer"; + interrupts = , + , + , + ; + }; + + pmu { + compatible = "arm,armv8-pmuv3"; + interrupts = , + ; + }; + + dcc { + compatible = "arm,vexpress,config-bus"; + arm,vexpress,config-bridge = <&v2m_sysreg>; + + smbclk: osc@4 { + /* SMC clock */ + compatible = "arm,vexpress-osc"; + arm,vexpress-sysreg,func = <1 4>; + freq-range = <40000000 40000000>; + #clock-cells = <0>; + clock-output-names = "smclk"; + }; + + volt@0 { + /* VIO to expansion board above */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 0>; + regulator-name = "VIO_UP"; + regulator-min-microvolt = <800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + }; + + volt@1 { + /* 12V from power connector J6 */ + compatible = "arm,vexpress-volt"; + arm,vexpress-sysreg,func = <2 1>; + regulator-name = "12"; + regulator-always-on; + }; + + temp@0 { + /* FPGA temperature */ + compatible = "arm,vexpress-temp"; + arm,vexpress-sysreg,func = <4 0>; + label = "FPGA"; + }; + }; + + smb { + compatible = "simple-bus"; + + #address-cells = <2>; + #size-cells = <1>; + ranges = <0 0 0 0x08000000 0x04000000>, + <1 0 0 0x14000000 0x04000000>, + <2 0 0 0x18000000 0x04000000>, + <3 0 0 0x1c000000 0x04000000>, + <4 0 0 0x0c000000 0x04000000>, + <5 0 0 0x10000000 0x04000000>; + + #interrupt-cells = <1>; + interrupt-map-mask = <0 0 63>; + interrupt-map = <0 0 0 &gic GIC_SPI 0 IRQ_TYPE_LEVEL_HIGH>, + <0 0 1 &gic GIC_SPI 1 IRQ_TYPE_LEVEL_HIGH>, + <0 0 2 &gic GIC_SPI 2 IRQ_TYPE_LEVEL_HIGH>, + <0 0 3 &gic GIC_SPI 3 IRQ_TYPE_LEVEL_HIGH>, + <0 0 4 &gic GIC_SPI 4 IRQ_TYPE_LEVEL_HIGH>, + <0 0 5 &gic GIC_SPI 5 IRQ_TYPE_LEVEL_HIGH>, + <0 0 6 &gic GIC_SPI 6 IRQ_TYPE_LEVEL_HIGH>, + <0 0 7 &gic GIC_SPI 7 IRQ_TYPE_LEVEL_HIGH>, + <0 0 8 &gic GIC_SPI 8 IRQ_TYPE_LEVEL_HIGH>, + <0 0 9 &gic GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>, + <0 0 10 &gic GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>, + <0 0 11 &gic GIC_SPI 11 IRQ_TYPE_LEVEL_HIGH>, + <0 0 12 &gic GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>, + <0 0 13 &gic GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>, + <0 0 14 &gic GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>, + <0 0 15 &gic GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>, + <0 0 16 &gic GIC_SPI 16 IRQ_TYPE_LEVEL_HIGH>, + <0 0 17 &gic GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>, + <0 0 18 &gic GIC_SPI 18 IRQ_TYPE_LEVEL_HIGH>, + <0 0 19 &gic GIC_SPI 19 IRQ_TYPE_LEVEL_HIGH>, + <0 0 20 &gic GIC_SPI 20 IRQ_TYPE_LEVEL_HIGH>, + <0 0 21 &gic GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>, + <0 0 22 &gic GIC_SPI 22 IRQ_TYPE_LEVEL_HIGH>, + <0 0 23 &gic GIC_SPI 23 IRQ_TYPE_LEVEL_HIGH>, + <0 0 24 &gic GIC_SPI 24 IRQ_TYPE_LEVEL_HIGH>, + <0 0 25 &gic GIC_SPI 25 IRQ_TYPE_LEVEL_HIGH>, + <0 0 26 &gic GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>, + <0 0 27 &gic GIC_SPI 27 IRQ_TYPE_LEVEL_HIGH>, + <0 0 28 &gic GIC_SPI 28 IRQ_TYPE_LEVEL_HIGH>, + <0 0 29 &gic GIC_SPI 29 IRQ_TYPE_LEVEL_HIGH>, + <0 0 30 &gic GIC_SPI 30 IRQ_TYPE_LEVEL_HIGH>, + <0 0 31 &gic GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>, + <0 0 32 &gic GIC_SPI 32 IRQ_TYPE_LEVEL_HIGH>, + <0 0 33 &gic GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>, + <0 0 34 &gic GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>, + <0 0 35 &gic GIC_SPI 35 IRQ_TYPE_LEVEL_HIGH>, + <0 0 36 &gic GIC_SPI 36 IRQ_TYPE_LEVEL_HIGH>, + <0 0 37 &gic GIC_SPI 37 IRQ_TYPE_LEVEL_HIGH>, + <0 0 38 &gic GIC_SPI 38 IRQ_TYPE_LEVEL_HIGH>, + <0 0 39 &gic GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>, + <0 0 40 &gic GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>, + <0 0 41 &gic GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>, + <0 0 42 &gic GIC_SPI 42 IRQ_TYPE_LEVEL_HIGH>; + + /include/ "../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi" + }; +}; -- cgit From 673c2c34f684e9d4328459e426ab54d51a5865c5 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 8 Jul 2015 17:07:41 -0400 Subject: modpost: work correctly with tile coldtext sections The tilegx and tilepro compilers use .coldtext for their unlikely executed text section name, so an __attribute__((cold)) function will (when compiled with higher optimization levels) land in the .coldtext section. Modify modpost to add .coldtext to the set of OTHER_TEXT_SECTIONS so we don't get warnings about referencing such a section in an __ex_table block, and then also modify arch/tile/lib/memcpy_user_64.c so that it uses plain ".coldtext" instead of ".coldtext.memcpy". The latter naming is a relic of an earlier use of -ffunction-sections, which we no longer use by default. Signed-off-by: Chris Metcalf Acked-by: Rusty Russell --- arch/tile/lib/memcpy_user_64.c | 4 ++-- scripts/mod/modpost.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/tile/lib/memcpy_user_64.c b/arch/tile/lib/memcpy_user_64.c index 88c7016492c4..97bbb6060b25 100644 --- a/arch/tile/lib/memcpy_user_64.c +++ b/arch/tile/lib/memcpy_user_64.c @@ -28,7 +28,7 @@ #define _ST(p, inst, v) \ ({ \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ @@ -41,7 +41,7 @@ ({ \ unsigned long __v; \ asm("1: " #inst " %0, %1;" \ - ".pushsection .coldtext.memcpy,\"ax\";" \ + ".pushsection .coldtext,\"ax\";" \ "2: { move r0, %2; jrp lr };" \ ".section __ex_table,\"a\";" \ ".align 8;" \ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 91ee1b2e0f9a..12d3db3bd46b 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -886,7 +886,8 @@ static void check_section(const char *modname, struct elf_info *elf, #define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \ ".kprobes.text" #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \ - ".fixup", ".entry.text", ".exception.text", ".text.*" + ".fixup", ".entry.text", ".exception.text", ".text.*", \ + ".coldtext" #define INIT_SECTIONS ".init.*" #define MEM_INIT_SECTIONS ".meminit.*" -- cgit From 19ee835cdb0b5a8eb11a68f25a51b8039d564488 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 29 Jun 2015 14:01:19 +0100 Subject: drm/i915: Declare the swizzling unknown for L-shaped configurations The old style of memory interleaving swizzled upto the end of the first even bank of memory, and then used the remainder as unswizzled on the unpaired bank - i.e. swizzling is not constant for all memory. This causes problems when we try to migrate memory and so the kernel prevents migration at all when we detect L-shaped inconsistent swizzling. However, this issue also extends to userspace who try to manually detile into memory as the swizzling for an individual page is unknown (it depends on its physical address only known to the kernel), userspace cannot correctly swizzle objects. v2: Mark the global swizzling as unknown rather than adjust the value reported to userspace. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91105 Signed-off-by: Chris Wilson Cc: Daniel Vetter Cc: stable@vger.kernel.org Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_tiling.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c index 633bd1fcab69..d61e74a08f82 100644 --- a/drivers/gpu/drm/i915/i915_gem_tiling.c +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c @@ -183,8 +183,18 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev) if (IS_GEN4(dev)) { uint32_t ddc2 = I915_READ(DCC2); - if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) + if (!(ddc2 & DCC2_MODIFIED_ENHANCED_DISABLE)) { + /* Since the swizzling may vary within an + * object, we have no idea what the swizzling + * is for any page in particular. Thus we + * cannot migrate tiled pages using the GPU, + * nor can we tell userspace what the exact + * swizzling is for any object. + */ dev_priv->quirks |= QUIRK_PIN_SWIZZLED_PAGES; + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } } if (dcc == 0xffffffff) { -- cgit From 52613921b31d8573a212a4b0854b390a18d9849c Mon Sep 17 00:00:00 2001 From: Ville Syrjälä Date: Mon, 29 Jun 2015 20:28:35 +0300 Subject: Revert "drm/i915: Allocate context objects from stolen" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stolen gets trashed during hibernation, so storing contexts there is not a very good idea. On my IVB machines this leads to a totally dead GPU on resume. A reboot is required to resurrect it. So let's not store contexts where they will get trampled. This reverts commit 149c86e74fe44dcbac5e9f8d145c5fbc5dc21261. Cc: Chris Wilson Signed-off-by: Ville Syrjälä Acked-by: Chris Wilson Signed-off-by: Daniel Vetter --- drivers/gpu/drm/i915/i915_gem_context.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index 8867818b1401..d65cbe6afb92 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -157,9 +157,7 @@ i915_gem_alloc_context_obj(struct drm_device *dev, size_t size) struct drm_i915_gem_object *obj; int ret; - obj = i915_gem_object_create_stolen(dev, size); - if (obj == NULL) - obj = i915_gem_alloc_object(dev, size); + obj = i915_gem_alloc_object(dev, size); if (obj == NULL) return ERR_PTR(-ENOMEM); -- cgit From 0ec62aaee919991d9e0e278f70776d560d95fc29 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: cris: Replace do_posix_clock_monotonic_gettime() ktime_get_ts() is the proper interface today. Signed-off-by: Thomas Gleixner Cc: Jesper Nilsson --- arch/cris/arch-v32/drivers/sync_serial.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index 4dda9bd6b8fb..e989cee77414 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -1464,7 +1464,7 @@ static inline void handle_rx_packet(struct sync_port *port) if (port->write_ts_idx == NBR_IN_DESCR) port->write_ts_idx = 0; idx = port->write_ts_idx++; - do_posix_clock_monotonic_gettime(&port->timestamp[idx]); + ktime_get_ts(&port->timestamp[idx]); port->in_buffer_len += port->inbufchunk; } spin_unlock_irqrestore(&port->lock, flags); -- cgit From 1f6823faa8c563431a94e614d2b63ce16bb6f658 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Jul 2015 10:51:46 +0200 Subject: time: Get rid of do_posix_clock_monotonic_gettime All users gone. Remove it before we get another one. Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 1 - 1 file changed, 1 deletion(-) diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3aa72e648650..6e191e4e6ab6 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -145,7 +145,6 @@ static inline void getboottime(struct timespec *ts) } #endif -#define do_posix_clock_monotonic_gettime(ts) ktime_get_ts(ts) #define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* -- cgit From a6335fa11e08fc386740557b2a4c1b7ff34bc499 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 2 Jul 2015 17:16:01 +0200 Subject: MIPS: bootmem: Don't use memory holes for page bitmap Commit f9a7febd leads to a fact that mapstart and therefore a page bitmap for bootmem allocator immediately follows initrd_end. This doesn't always work well on Octeon, where there are holes in PFN ranges (refer to 5b3b1688 and 4MB-aligned PFN allocation). Depending on the inird location it could happen, that mapstart would be in an area not allocated by plat_mem_setup() in arch/mips/cavium-octeon/setup.c, but in the alignment hole between initrd and the next PFN area. Later on this memory will be unconditionally made available to buddy allocator at the end of free_all_bootmem_core() (mm/bootmem.c). All of this results in Linux using the memory not designated for Linux in Octeon's plat_mem_setup(), which in turn means corruption of the memory used by another OS/baremetal code on the same SoC. It doesn't look to me as a problem of Octeon platform code, but rather as an inability of f9a7febd to deal correctly with the fragmented memory-mappings. Proposed fix moves the check for initrd address to the same calculation-loop in bootmem_init() (arch/mips/kernel/setup.c), which also accounts for kernel code location. This should result in mapstart located starting from the first PFN area after kernel code AND initrd. Signed-off-by: Alexander Sverdlin Cc: linux-mips@linux-mips.org Cc: David Daney Cc: Zubair Lutfullah Kakakhel Cc: Huacai Chen Cc: Andreas Herrmann Cc: Joe Perches Cc: Steven J. Hill Cc: Yusuf Khan Cc: Michael Kreuzer Cc: Aaro Koskinen Patchwork: https://patchwork.linux-mips.org/patch/10594/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index be73c491182b..008b3378653a 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -337,6 +337,11 @@ static void __init bootmem_init(void) min_low_pfn = start; if (end <= reserved_end) continue; +#ifdef CONFIG_BLK_DEV_INITRD + /* mapstart should be after initrd_end */ + if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end))) + continue; +#endif if (start >= mapstart) continue; mapstart = max(reserved_end, start); @@ -366,14 +371,6 @@ static void __init bootmem_init(void) max_low_pfn = PFN_DOWN(HIGHMEM_START); } -#ifdef CONFIG_BLK_DEV_INITRD - /* - * mapstart should be after initrd_end - */ - if (initrd_end) - mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end))); -#endif - /* * Initialize the boot-time allocator with low memory only. */ -- cgit From 761b4493bbb7b614387794f39e3969716e9e0215 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:29:20 +0100 Subject: MIPS: kernel: traps: Fix broken indentation Fix broken indentation caused by the SMTC removal commit b633648c5ad3cfbda0b3daea50d2135d44899259 ("MIPS: MT: Remove SMTC support") Signed-off-by: Markos Chandras Fixes: b633648c5ad3c ("MIPS: MT: Remove SMTC support") Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10581/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 2a7b38ed23f0..e207a43b5f8f 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2130,10 +2130,10 @@ void per_cpu_trap_init(bool is_boot_cpu) BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); - /* Boot CPU's cache setup in setup_arch(). */ - if (!is_boot_cpu) - cpu_cache_init(); - tlb_init(); + /* Boot CPU's cache setup in setup_arch(). */ + if (!is_boot_cpu) + cpu_cache_init(); + tlb_init(); TLBMISS_HANDLER_SETUP(); } -- cgit From e9d92d223381f1f3be5d87322b576721d3b93612 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:52:00 +0100 Subject: MIPS: Fix branch emulation for BLTC and BGEC instructions Commits f1b44067c19258b7614e3cd09dfe8d8e12ff5895 ("MIPS: Emulate the new MIPS R6 B{L,G}T{Z,}{AL,}C instructions") and commit a8ff66f52d3f17b5ae793955270675c197f73d6c ("MIPS: Emulate the new MIPS R6 B{L,G}E{Z,}{AL,}C instructions") added support for emulating various branch compact instructions. However, it missed the case for those which use the old BLEZL and BGTZL opcodes leading to random crashes when the R6 emulator is disabled. We fix this by ensuring that the 'rt' field is not zero which is always true for these branch compact instructions. Fixes: f1b44067c192 ("MIPS: Emulate the new MIPS R6 B{L,G}T{Z,}{AL,}C instructions") Fixes: a8ff66f52d3f ("MIPS: Emulate the new MIPS R6 B{L,G}E{Z,}{AL,}C instructions") Cc: # 4.0+ Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: Markos Chandras Patchwork: https://patchwork.linux-mips.org/patch/10582/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/branch.c | 4 ++-- arch/mips/math-emu/cp1emu.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index c0c5e5972256..d8f9b357b222 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -600,7 +600,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case blezl_op: /* not really i_format */ - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case blez_op: /* @@ -635,7 +635,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, break; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) goto sigill_r6; case bgtz_op: /* diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 22b9b2cb9219..00c241ae04ce 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -551,7 +551,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case blezl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case blez_op: @@ -588,7 +588,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, dec_insn.next_pc_inc; return 1; case bgtzl_op: - if (NO_R6EMU) + if (!insn.i_format.rt && NO_R6EMU) break; case bgtz_op: /* -- cgit From 143fefc8f315cd10e046e6860913c421c3385cb1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 24 Jun 2015 09:52:01 +0100 Subject: MIPS: Fix erroneous JR emulation for MIPS R6 Commit 5f9f41c474befb4ebbc40b27f65bb7d649241581 ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") added support for emulating the JR instruction on MIPS R6 cores but that introduced a bug which could be triggered when hitting a JALR opcode because the code used the wrong field in the 'r_format' struct to determine the instruction opcode. This lead to crashes because an emulated JALR instruction was treated as a JR one when the R6 emulator was turned off. Fixes: 5f9f41c474be ("MIPS: kernel: Prepare the JR instruction for emulation on MIPS R6") Cc: # 4.0+ Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10583/ Signed-off-by: Ralf Baechle --- arch/mips/math-emu/cp1emu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 00c241ae04ce..712f17a2ecf2 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -451,7 +451,7 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, /* Fall through */ case jr_op: /* For R6, JR already emulated in jalr_op */ - if (NO_R6EMU && insn.r_format.opcode == jr_op) + if (NO_R6EMU && insn.r_format.func == jr_op) break; *contpc = regs->regs[insn.r_format.rs]; return 1; -- cgit From fd5ed3066bb2f47814fe53cdc56d11a678551ae1 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:28 +0100 Subject: MIPS: kernel: smp-cps: Fix 64-bit compatibility errors due to pointer casting Commit 1d8f1f5a780a ("MIPS: smp-cps: hotplug support") added hotplug support in the SMP/CPS implementation but it introduced a few build problems on 64-bit kernels due to pointer being casted to and from 'int' C types. We fix this problem by using 'unsigned long' instead which should match the size of the pointers in 32/64-bit kernels. Finally, we fix the comment since the CM base address is loaded to v1($3) instead of v0. Fixes the following build problems: arch/mips/kernel/smp-cps.c: In function 'wait_for_sibling_halt': arch/mips/kernel/smp-cps.c:366:17: error: cast from pointer to integer of different size [-Werror=pointer-to-int-cast] [...] arch/mips/kernel/smp-cps.c: In function 'cps_cpu_die': arch/mips/kernel/smp-cps.c:427:13: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast] cc1: all warnings being treated as errors Fixes: 1d8f1f5a780a ("MIPS: smp-cps: hotplug support") Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10586/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/smp-cps.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index 4251d390b5b6..c88937745b4e 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -133,7 +133,7 @@ static void __init cps_prepare_cpus(unsigned int max_cpus) /* * Patch the start of mips_cps_core_entry to provide: * - * v0 = CM base address + * v1 = CM base address * s0 = kseg0 CCA */ entry_code = (u32 *)&mips_cps_core_entry; @@ -369,7 +369,7 @@ void play_dead(void) static void wait_for_sibling_halt(void *ptr_cpu) { - unsigned cpu = (unsigned)ptr_cpu; + unsigned cpu = (unsigned long)ptr_cpu; unsigned vpe_id = cpu_vpe_id(&cpu_data[cpu]); unsigned halted; unsigned long flags; @@ -430,7 +430,7 @@ static void cps_cpu_die(unsigned int cpu) */ err = smp_call_function_single(cpu_death_sibling, wait_for_sibling_halt, - (void *)cpu, 1); + (void *)(unsigned long)cpu, 1); if (err) panic("Failed to call remote sibling CPU\n"); } -- cgit From 81a02e34ded906357deac7003fbb0d36b6cc503f Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:29 +0100 Subject: MIPS: kernel: cps-vec: Replace 'la' macro with PTR_LA The PTR_LA macro will pick the correct "la" or "dla" macro to load an address to a register. This gets rids of the following warnings (and others) when building a 64-bit CPS kernel: arch/mips/kernel/cps-vec.S:63: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:159: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:220: Warning: la used to load 64-bit address arch/mips/kernel/cps-vec.S:240: Warning: la used to load 64-bit address [...] Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10587/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 55b759a0019e..a4b2d81f45dd 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -60,7 +60,7 @@ LEAF(mips_cps_core_entry) nop /* This is an NMI */ - la k0, nmi_handler + PTR_LA k0, nmi_handler jr k0 nop @@ -156,7 +156,7 @@ dcache_done: ehb /* Jump to kseg0 */ - la t0, 1f + PTR_LA t0, 1f jr t0 nop @@ -217,7 +217,7 @@ LEAF(excep_intex) .org 0x480 LEAF(excep_ejtag) - la k0, ejtag_debug_handler + PTR_LA k0, ejtag_debug_handler jr k0 nop END(excep_ejtag) @@ -237,7 +237,7 @@ LEAF(mips_cps_core_init) /* ...and for the moment only 1 VPE */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop @@ -298,14 +298,14 @@ LEAF(mips_cps_core_init) LEAF(mips_cps_boot_vpes) /* Retrieve CM base address */ - la t0, mips_cm_base + PTR_LA t0, mips_cm_base lw t0, 0(t0) /* Calculate a pointer to this cores struct core_boot_config */ lw t0, GCR_CL_ID_OFS(t0) li t1, COREBOOTCFG_SIZE mul t0, t0, t1 - la t1, mips_cps_core_bootcfg + PTR_LA t1, mips_cps_core_bootcfg lw t1, 0(t1) addu t0, t0, t1 @@ -351,7 +351,7 @@ LEAF(mips_cps_boot_vpes) 1: /* Enter VPE configuration state */ dvpe - la t1, 1f + PTR_LA t1, 1f jr.hb t1 nop 1: mfc0 t1, CP0_MVPCONTROL @@ -445,7 +445,7 @@ LEAF(mips_cps_boot_vpes) /* This VPE should be offline, halt the TC */ li t0, TCHALT_H mtc0 t0, CP0_TCHALT - la t0, 1f + PTR_LA t0, 1f 1: jr.hb t0 nop @@ -466,10 +466,10 @@ LEAF(mips_cps_boot_vpes) .set noat lw $1, TI_CPU(gp) sll $1, $1, LONGLOG - la \dest, __per_cpu_offset + PTR_LA \dest, __per_cpu_offset addu $1, $1, \dest lw $1, 0($1) - la \dest, cps_cpu_state + PTR_LA \dest, cps_cpu_state addu \dest, \dest, $1 .set pop .endm -- cgit From 977e043d5ea1270ce985e4c165724ff91dc3c3e2 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:30 +0100 Subject: MIPS: kernel: cps-vec: Replace mips32r2 ISA level with mips64r2 mips32r2 is a subset of mips64r2, so we replace mips32r2 with mips64r2 in preparation for 64-bit CPS support. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10588/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index a4b2d81f45dd..bbbd88e994f0 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -229,7 +229,7 @@ LEAF(mips_cps_core_init) nop .set push - .set mips32r2 + .set mips64r2 .set mt /* Only allow 1 TC per VPE to execute... */ @@ -346,7 +346,7 @@ LEAF(mips_cps_boot_vpes) nop .set push - .set mips32r2 + .set mips64r2 .set mt 1: /* Enter VPE configuration state */ -- cgit From 0586ac75cd0746a4d5c43372dabcea8739ae0176 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:31 +0100 Subject: MIPS: kernel: cps-vec: Use ta0-ta3 pseudo-registers for 64-bit The cps-vec code assumes O32 ABI and uses t4-t7 in quite a few places. This breaks the build on 64-bit. As a result of which, use the pseudo-registers ta0-ta3 to make the code compatible with 64-bit. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10589/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index bbbd88e994f0..21f714a81ebd 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -250,25 +250,25 @@ LEAF(mips_cps_core_init) mfc0 t0, CP0_MVPCONF0 srl t0, t0, MVPCONF0_PVPE_SHIFT andi t0, t0, (MVPCONF0_PVPE >> MVPCONF0_PVPE_SHIFT) - addiu t7, t0, 1 + addiu ta3, t0, 1 /* If there's only 1, we're done */ beqz t0, 2f nop /* Loop through each VPE within this core */ - li t5, 1 + li ta1, 1 1: /* Operate on the appropriate TC */ - mtc0 t5, CP0_VPECONTROL + mtc0 ta1, CP0_VPECONTROL ehb /* Bind TC to VPE (1:1 TC:VPE mapping) */ - mttc0 t5, CP0_TCBIND + mttc0 ta1, CP0_TCBIND /* Set exclusive TC, non-active, master */ li t0, VPECONF0_MVP - sll t1, t5, VPECONF0_XTC_SHIFT + sll t1, ta1, VPECONF0_XTC_SHIFT or t0, t0, t1 mttc0 t0, CP0_VPECONF0 @@ -280,8 +280,8 @@ LEAF(mips_cps_core_init) mttc0 t0, CP0_TCHALT /* Next VPE */ - addiu t5, t5, 1 - slt t0, t5, t7 + addiu ta1, ta1, 1 + slt t0, ta1, ta3 bnez t0, 1b nop @@ -310,7 +310,7 @@ LEAF(mips_cps_boot_vpes) addu t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ - has_mt t6, 1f + has_mt ta2, 1f li t9, 0 /* Find the number of VPEs present in the core */ @@ -334,13 +334,13 @@ LEAF(mips_cps_boot_vpes) 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE mul v0, t9, t1 - lw t7, COREBOOTCFG_VPECONFIG(t0) - addu v0, v0, t7 + lw ta3, COREBOOTCFG_VPECONFIG(t0) + addu v0, v0, ta3 #ifdef CONFIG_MIPS_MT /* If the core doesn't support MT then return */ - bnez t6, 1f + bnez ta2, 1f nop jr ra nop @@ -360,12 +360,12 @@ LEAF(mips_cps_boot_vpes) ehb /* Loop through each VPE */ - lw t6, COREBOOTCFG_VPEMASK(t0) - move t8, t6 - li t5, 0 + lw ta2, COREBOOTCFG_VPEMASK(t0) + move t8, ta2 + li ta1, 0 /* Check whether the VPE should be running. If not, skip it */ -1: andi t0, t6, 1 +1: andi t0, ta2, 1 beqz t0, 2f nop @@ -373,7 +373,7 @@ LEAF(mips_cps_boot_vpes) mfc0 t0, CP0_VPECONTROL ori t0, t0, VPECONTROL_TARGTC xori t0, t0, VPECONTROL_TARGTC - or t0, t0, t5 + or t0, t0, ta1 mtc0 t0, CP0_VPECONTROL ehb @@ -384,8 +384,8 @@ LEAF(mips_cps_boot_vpes) /* Calculate a pointer to the VPEs struct vpe_boot_config */ li t0, VPEBOOTCFG_SIZE - mul t0, t0, t5 - addu t0, t0, t7 + mul t0, t0, ta1 + addu t0, t0, ta3 /* Set the TC restart PC */ lw t1, VPEBOOTCFG_PC(t0) @@ -423,9 +423,9 @@ LEAF(mips_cps_boot_vpes) mttc0 t0, CP0_VPECONF0 /* Next VPE */ -2: srl t6, t6, 1 - addiu t5, t5, 1 - bnez t6, 1b +2: srl ta2, ta2, 1 + addiu ta1, ta1, 1 + bnez ta2, 1b nop /* Leave VPE configuration state */ -- cgit From 717f14255a52ad445d6f0eca7d0f22f59d6ba1f8 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:32 +0100 Subject: MIPS: kernel: cps-vec: Replace KSEG0 with CKSEG0 In preparation for 64-bit CPS support, we replace KSEG0 with CKSEG0 so 64-bit kernels can be supported. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10590/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 21f714a81ebd..2f95568e0da5 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -107,7 +107,7 @@ not_nmi: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 + li a0, CKSEG0 add a1, a0, t1 1: cache Index_Store_Tag_I, 0(a0) add a0, a0, t0 @@ -134,7 +134,7 @@ icache_done: mul t1, t1, t0 mul t1, t1, t2 - li a0, KSEG0 + li a0, CKSEG0 addu a1, a0, t1 subu a1, a1, t0 1: cache Index_Store_Tag_D, 0(a0) -- cgit From b677bc03d757c7d749527cccdd2afcf34ebeeb07 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:13:33 +0100 Subject: MIPS: cps-vec: Use macros for various arithmetics and memory operations Replace lw/sw and various arithmetic instructions with macros so the code can work on 64-bit kernels as well. Cc: # 3.16+ Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10591/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/cps-vec.S | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S index 2f95568e0da5..1b6ca634e646 100644 --- a/arch/mips/kernel/cps-vec.S +++ b/arch/mips/kernel/cps-vec.S @@ -108,9 +108,9 @@ not_nmi: mul t1, t1, t2 li a0, CKSEG0 - add a1, a0, t1 + PTR_ADD a1, a0, t1 1: cache Index_Store_Tag_I, 0(a0) - add a0, a0, t0 + PTR_ADD a0, a0, t0 bne a0, a1, 1b nop icache_done: @@ -135,11 +135,11 @@ icache_done: mul t1, t1, t2 li a0, CKSEG0 - addu a1, a0, t1 - subu a1, a1, t0 + PTR_ADDU a1, a0, t1 + PTR_SUBU a1, a1, t0 1: cache Index_Store_Tag_D, 0(a0) bne a0, a1, 1b - add a0, a0, t0 + PTR_ADD a0, a0, t0 dcache_done: /* Set Kseg0 CCA to that in s0 */ @@ -152,7 +152,7 @@ dcache_done: /* Enter the coherent domain */ li t0, 0xff - sw t0, GCR_CL_COHERENCE_OFS(v1) + PTR_S t0, GCR_CL_COHERENCE_OFS(v1) ehb /* Jump to kseg0 */ @@ -178,9 +178,9 @@ dcache_done: nop /* Off we go! */ - lw t1, VPEBOOTCFG_PC(v0) - lw gp, VPEBOOTCFG_GP(v0) - lw sp, VPEBOOTCFG_SP(v0) + PTR_L t1, VPEBOOTCFG_PC(v0) + PTR_L gp, VPEBOOTCFG_GP(v0) + PTR_L sp, VPEBOOTCFG_SP(v0) jr t1 nop END(mips_cps_core_entry) @@ -299,15 +299,15 @@ LEAF(mips_cps_core_init) LEAF(mips_cps_boot_vpes) /* Retrieve CM base address */ PTR_LA t0, mips_cm_base - lw t0, 0(t0) + PTR_L t0, 0(t0) /* Calculate a pointer to this cores struct core_boot_config */ - lw t0, GCR_CL_ID_OFS(t0) + PTR_L t0, GCR_CL_ID_OFS(t0) li t1, COREBOOTCFG_SIZE mul t0, t0, t1 PTR_LA t1, mips_cps_core_bootcfg - lw t1, 0(t1) - addu t0, t0, t1 + PTR_L t1, 0(t1) + PTR_ADDU t0, t0, t1 /* Calculate this VPEs ID. If the core doesn't support MT use 0 */ has_mt ta2, 1f @@ -334,8 +334,8 @@ LEAF(mips_cps_boot_vpes) 1: /* Calculate a pointer to this VPEs struct vpe_boot_config */ li t1, VPEBOOTCFG_SIZE mul v0, t9, t1 - lw ta3, COREBOOTCFG_VPECONFIG(t0) - addu v0, v0, ta3 + PTR_L ta3, COREBOOTCFG_VPECONFIG(t0) + PTR_ADDU v0, v0, ta3 #ifdef CONFIG_MIPS_MT @@ -360,7 +360,7 @@ LEAF(mips_cps_boot_vpes) ehb /* Loop through each VPE */ - lw ta2, COREBOOTCFG_VPEMASK(t0) + PTR_L ta2, COREBOOTCFG_VPEMASK(t0) move t8, ta2 li ta1, 0 -- cgit From 1c885357da2d3cf62132e611c0beaf4cdf607dd9 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Wed, 1 Jul 2015 09:31:14 +0100 Subject: Revert "MIPS: Kconfig: Disable SMP/CPS for 64-bit" This reverts commit 6ca716f2e5571d25a3899c6c5c91ff72ea6d6f5e. SMP/CPS is now supported on 64bit cores. Cc: # 4.1 Reviewed-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Cc: stable@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/10592/ Signed-off-by: Ralf Baechle --- arch/mips/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 2a14585c90d2..aab7e46cadd5 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2231,7 +2231,7 @@ config MIPS_CMP config MIPS_CPS bool "MIPS Coherent Processing System support" - depends on SYS_SUPPORTS_MIPS_CPS && !64BIT + depends on SYS_SUPPORTS_MIPS_CPS select MIPS_CM select MIPS_CPC select MIPS_CPS_PM if HOTPLUG_CPU -- cgit From a0f67441b06525a1e5fd713ba0d75af4e5d6b198 Mon Sep 17 00:00:00 2001 From: Maninder Singh Date: Thu, 9 Jul 2015 14:41:53 +0530 Subject: drm/amdkfd: validate pdd where it acquired first Currently pdd is validate after dereferencing it, which is not correct, Thus validate pdd before its first use. Signed-off-by: Maninder Singh Signed-off-by: Oded Gabbay --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 8a1f999daa24..9be007081b72 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -420,6 +420,12 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) pqm_uninit(&p->pqm); pdd = kfd_get_process_device_data(dev, p); + + if (!pdd) { + mutex_unlock(&p->mutex); + return; + } + if (pdd->reset_wavefronts) { dbgdev_wave_reset_wavefronts(pdd->dev, p); pdd->reset_wavefronts = false; @@ -431,8 +437,7 @@ void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid) * We don't call amd_iommu_unbind_pasid() here * because the IOMMU called us. */ - if (pdd) - pdd->bound = false; + pdd->bound = false; mutex_unlock(&p->mutex); } -- cgit From cd404af0c930104462aa91344f07d002cf8248ed Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Tue, 7 Jul 2015 16:27:28 +0900 Subject: drm/radeon: Clean up reference counting and pinning of the cursor BOs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Take a GEM reference for and pin the new cursor BO, unpin and drop the GEM reference for the old cursor BO in radeon_crtc_cursor_set2, and use radeon_crtc->cursor_addr in radeon_set_cursor. This fixes radeon_cursor_reset accidentally incrementing the cursor BO pin count, and cleans up the code a little. Cc: stable@vger.kernel.org Reviewed-by: Grigori Goronzy Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cursor.c | 84 +++++++++++++++------------------- drivers/gpu/drm/radeon/radeon_mode.h | 1 - 2 files changed, 37 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index 45e54060ee97..fa661744a1f5 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -205,8 +205,9 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y) | (x << 16) | y)); /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, (radeon_crtc->legacy_cursor_offset + - (yorigin * 256))); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr + + yorigin * 256); } radeon_crtc->cursor_x = x; @@ -227,51 +228,32 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static int radeon_set_cursor(struct drm_crtc *crtc, struct drm_gem_object *obj) +static void radeon_set_cursor(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_device *rdev = crtc->dev->dev_private; - struct radeon_bo *robj = gem_to_radeon_bo(obj); - uint64_t gpu_addr; - int ret; - - ret = radeon_bo_reserve(robj, false); - if (unlikely(ret != 0)) - goto fail; - /* Only 27 bit offset for legacy cursor */ - ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, - ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, - &gpu_addr); - radeon_bo_unreserve(robj); - if (ret) - goto fail; if (ASIC_IS_DCE4(rdev)) { WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(gpu_addr)); + upper_32_bits(radeon_crtc->cursor_addr)); WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else if (ASIC_IS_AVIVO(rdev)) { if (rdev->family >= CHIP_RV770) { if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, upper_32_bits(gpu_addr)); + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); } WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - gpu_addr & 0xffffffff); + lower_32_bits(radeon_crtc->cursor_addr)); } else { - radeon_crtc->legacy_cursor_offset = gpu_addr - radeon_crtc->legacy_display_base_addr; /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, radeon_crtc->legacy_cursor_offset); + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); } - - return 0; - -fail: - drm_gem_object_unreference_unlocked(obj); - - return ret; } int radeon_crtc_cursor_set2(struct drm_crtc *crtc, @@ -283,7 +265,9 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, int32_t hot_y) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + struct radeon_device *rdev = crtc->dev->dev_private; struct drm_gem_object *obj; + struct radeon_bo *robj; int ret; if (!handle) { @@ -305,6 +289,23 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, return -ENOENT; } + robj = gem_to_radeon_bo(obj); + ret = radeon_bo_reserve(robj, false); + if (ret != 0) { + drm_gem_object_unreference_unlocked(obj); + return ret; + } + /* Only 27 bit offset for legacy cursor */ + ret = radeon_bo_pin_restricted(robj, RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, + &radeon_crtc->cursor_addr); + radeon_bo_unreserve(robj); + if (ret) { + DRM_ERROR("Failed to pin new cursor BO (%d)\n", ret); + drm_gem_object_unreference_unlocked(obj); + return ret; + } + radeon_crtc->cursor_width = width; radeon_crtc->cursor_height = height; @@ -323,13 +324,8 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - ret = radeon_set_cursor(crtc, obj); - - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not changing cursor\n", - ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -341,8 +337,7 @@ unpin: radeon_bo_unpin(robj); radeon_bo_unreserve(robj); } - if (radeon_crtc->cursor_bo != obj) - drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); + drm_gem_object_unreference_unlocked(radeon_crtc->cursor_bo); } radeon_crtc->cursor_bo = obj; @@ -360,7 +355,6 @@ unpin: void radeon_cursor_reset(struct drm_crtc *crtc) { struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - int ret; if (radeon_crtc->cursor_bo) { radeon_lock_cursor(crtc, true); @@ -368,12 +362,8 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - ret = radeon_set_cursor(crtc, radeon_crtc->cursor_bo); - if (ret) - DRM_ERROR("radeon_set_cursor returned %d, not showing " - "cursor\n", ret); - else - radeon_show_cursor(crtc); + radeon_set_cursor(crtc); + radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); } diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h index 6de5459316b5..07909d817381 100644 --- a/drivers/gpu/drm/radeon/radeon_mode.h +++ b/drivers/gpu/drm/radeon/radeon_mode.h @@ -343,7 +343,6 @@ struct radeon_crtc { int max_cursor_width; int max_cursor_height; uint32_t legacy_display_base_addr; - uint32_t legacy_cursor_offset; enum radeon_rmx_type rmx_type; u8 h_border; u8 v_border; -- cgit From f3cbb17bcf676a2fc6aedebe9fbebd59e550c51a Mon Sep 17 00:00:00 2001 From: Grigori Goronzy Date: Tue, 7 Jul 2015 16:27:29 +0900 Subject: drm/radeon: unpin cursor BOs on suspend and pin them again on resume (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Everything is evicted from VRAM before suspend, so we need to make sure all BOs are unpinned and re-pinned after resume. Fixes broken mouse cursor after resume introduced by commit b9729b17. [Michel Dänzer: Add pinning BOs on resume] v2: [Alex Deucher: merge cursor unpin into fb unpin loop] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100541 Cc: stable@vger.kernel.org Reviewed-by: Christian König (v1) Signed-off-by: Grigori Goronzy Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_device.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 1a532a9bc3a3..d8319dae8358 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -1578,11 +1578,21 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon) drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF); } - /* unpin the front buffers */ + /* unpin the front buffers and cursors */ list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); struct radeon_framebuffer *rfb = to_radeon_framebuffer(crtc->primary->fb); struct radeon_bo *robj; + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + radeon_bo_unpin(robj); + radeon_bo_unreserve(robj); + } + } + if (rfb == NULL || rfb->obj == NULL) { continue; } @@ -1645,6 +1655,7 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) { struct drm_connector *connector; struct radeon_device *rdev = dev->dev_private; + struct drm_crtc *crtc; int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) @@ -1684,6 +1695,27 @@ int radeon_resume_kms(struct drm_device *dev, bool resume, bool fbcon) radeon_restore_bios_scratch_regs(rdev); + /* pin cursors */ + list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { + struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); + + if (radeon_crtc->cursor_bo) { + struct radeon_bo *robj = gem_to_radeon_bo(radeon_crtc->cursor_bo); + r = radeon_bo_reserve(robj, false); + if (r == 0) { + /* Only 27 bit offset for legacy cursor */ + r = radeon_bo_pin_restricted(robj, + RADEON_GEM_DOMAIN_VRAM, + ASIC_IS_AVIVO(rdev) ? + 0 : 1 << 27, + &radeon_crtc->cursor_addr); + if (r != 0) + DRM_ERROR("Failed to pin cursor BO (%d)\n", r); + radeon_bo_unreserve(robj); + } + } + } + /* init dig PHYs, disp eng pll */ if (rdev->is_atom_bios) { radeon_atom_encoder_init(rdev); -- cgit From 8991668ab4e26f985a8485719bce5d6d0623a644 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Tue, 7 Jul 2015 16:27:30 +0900 Subject: drm/radeon: Fold radeon_set_cursor() into radeon_show_cursor() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Grigori Goronzy Reviewed-by: Christian König Signed-off-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_cursor.c | 49 +++++++++++++--------------------- 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c index fa661744a1f5..afaf346bd50e 100644 --- a/drivers/gpu/drm/radeon/radeon_cursor.c +++ b/drivers/gpu/drm/radeon/radeon_cursor.c @@ -91,15 +91,34 @@ static void radeon_show_cursor(struct drm_crtc *crtc) struct radeon_device *rdev = crtc->dev->dev_private; if (ASIC_IS_DCE4(rdev)) { + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, + upper_32_bits(radeon_crtc->cursor_addr)); + WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, EVERGREEN_CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, EVERGREEN_CURSOR_EN | EVERGREEN_CURSOR_MODE(EVERGREEN_CURSOR_24_8_PRE_MULT) | EVERGREEN_CURSOR_URGENT_CONTROL(EVERGREEN_CURSOR_URGENT_1_2)); } else if (ASIC_IS_AVIVO(rdev)) { + if (rdev->family >= CHIP_RV770) { + if (radeon_crtc->crtc_id) + WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + else + WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, + upper_32_bits(radeon_crtc->cursor_addr)); + } + + WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, + lower_32_bits(radeon_crtc->cursor_addr)); WREG32(RADEON_MM_INDEX, AVIVO_D1CUR_CONTROL + radeon_crtc->crtc_offset); WREG32(RADEON_MM_DATA, AVIVO_D1CURSOR_EN | (AVIVO_D1CURSOR_MODE_24BPP << AVIVO_D1CURSOR_MODE_SHIFT)); } else { + /* offset is from DISP(2)_BASE_ADDRESS */ + WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, + radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); + switch (radeon_crtc->crtc_id) { case 0: WREG32(RADEON_MM_INDEX, RADEON_CRTC_GEN_CNTL); @@ -228,34 +247,6 @@ int radeon_crtc_cursor_move(struct drm_crtc *crtc, return ret; } -static void radeon_set_cursor(struct drm_crtc *crtc) -{ - struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc); - struct radeon_device *rdev = crtc->dev->dev_private; - - if (ASIC_IS_DCE4(rdev)) { - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS_HIGH + radeon_crtc->crtc_offset, - upper_32_bits(radeon_crtc->cursor_addr)); - WREG32(EVERGREEN_CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - lower_32_bits(radeon_crtc->cursor_addr)); - } else if (ASIC_IS_AVIVO(rdev)) { - if (rdev->family >= CHIP_RV770) { - if (radeon_crtc->crtc_id) - WREG32(R700_D2CUR_SURFACE_ADDRESS_HIGH, - upper_32_bits(radeon_crtc->cursor_addr)); - else - WREG32(R700_D1CUR_SURFACE_ADDRESS_HIGH, - upper_32_bits(radeon_crtc->cursor_addr)); - } - WREG32(AVIVO_D1CUR_SURFACE_ADDRESS + radeon_crtc->crtc_offset, - lower_32_bits(radeon_crtc->cursor_addr)); - } else { - /* offset is from DISP(2)_BASE_ADDRESS */ - WREG32(RADEON_CUR_OFFSET + radeon_crtc->crtc_offset, - radeon_crtc->cursor_addr - radeon_crtc->legacy_display_base_addr); - } -} - int radeon_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv, uint32_t handle, @@ -324,7 +315,6 @@ int radeon_crtc_cursor_set2(struct drm_crtc *crtc, radeon_crtc->cursor_hot_y = hot_y; } - radeon_set_cursor(crtc); radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); @@ -362,7 +352,6 @@ void radeon_cursor_reset(struct drm_crtc *crtc) radeon_cursor_move_locked(crtc, radeon_crtc->cursor_x, radeon_crtc->cursor_y); - radeon_set_cursor(crtc); radeon_show_cursor(crtc); radeon_lock_cursor(crtc, false); -- cgit From d57c0edfe00d3274b50f91ce3076ed0e82d28782 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jul 2015 14:08:12 -0400 Subject: Revert "Revert "drm/radeon: dont switch vt on suspend"" This reverts commit ac9134906b3f5c2b45dc80dab0fee792bd516d52. We've fixed the underlying problem with cursors, so re-enable this. --- drivers/gpu/drm/radeon/radeon_fb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index 634793ea8418..aeb676708e60 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -257,6 +257,7 @@ static int radeonfb_create(struct drm_fb_helper *helper, } info->par = rfbdev; + info->skip_vt_switch = true; ret = radeon_framebuffer_init(rdev->ddev, &rfbdev->rfb, &mode_cmd, gobj); if (ret) { -- cgit From eb99070b4ad105f8a516d99e95226180924eddc8 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 8 Jul 2015 09:56:13 +0200 Subject: drm/radeon: allways add the VM clear duplicate MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to allways add the VM clear duplicate of the BO_VA, no matter what the old status was. Signed-off-by: Christian König Test-by: hadack@gmx.de Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index ec10533a49b8..031096599f84 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -493,29 +493,27 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, } if (bo_va->it.start || bo_va->it.last) { - spin_lock(&vm->status_lock); - if (list_empty(&bo_va->vm_status)) { - /* add a clone of the bo_va to clear the old address */ - struct radeon_bo_va *tmp; - spin_unlock(&vm->status_lock); - tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); - if (!tmp) { - mutex_unlock(&vm->mutex); - r = -ENOMEM; - goto error_unreserve; - } - tmp->it.start = bo_va->it.start; - tmp->it.last = bo_va->it.last; - tmp->vm = vm; - tmp->bo = radeon_bo_ref(bo_va->bo); - spin_lock(&vm->status_lock); - list_add(&tmp->vm_status, &vm->freed); + /* add a clone of the bo_va to clear the old address */ + struct radeon_bo_va *tmp; + tmp = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL); + if (!tmp) { + mutex_unlock(&vm->mutex); + r = -ENOMEM; + goto error_unreserve; } - spin_unlock(&vm->status_lock); + tmp->it.start = bo_va->it.start; + tmp->it.last = bo_va->it.last; + tmp->vm = vm; + tmp->bo = radeon_bo_ref(bo_va->bo); interval_tree_remove(&bo_va->it, &vm->va); bo_va->it.start = 0; bo_va->it.last = 0; + + spin_lock(&vm->status_lock); + list_del_init(&bo_va->vm_status); + list_add(&tmp->vm_status, &vm->freed); + spin_unlock(&vm->status_lock); } if (soffset || eoffset) { -- cgit From dbedff05d163f8d1b4a97b91c57b939830fc235f Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 8 Jul 2015 09:56:14 +0200 Subject: drm/radeon: check if BO_VA is set before adding it to the invalidation list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Otherwise we try to clear BO_VAs without an address. Fixes: https://bugs.freedesktop.org/show_bug.cgi?id=91141 Signed-off-by: Christian König Test-by: hadack@gmx.de Tested-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/radeon_vm.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c index 031096599f84..48d97c040f49 100644 --- a/drivers/gpu/drm/radeon/radeon_vm.c +++ b/drivers/gpu/drm/radeon/radeon_vm.c @@ -507,22 +507,21 @@ int radeon_vm_bo_set_addr(struct radeon_device *rdev, tmp->bo = radeon_bo_ref(bo_va->bo); interval_tree_remove(&bo_va->it, &vm->va); + spin_lock(&vm->status_lock); bo_va->it.start = 0; bo_va->it.last = 0; - - spin_lock(&vm->status_lock); list_del_init(&bo_va->vm_status); list_add(&tmp->vm_status, &vm->freed); spin_unlock(&vm->status_lock); } if (soffset || eoffset) { + spin_lock(&vm->status_lock); bo_va->it.start = soffset; bo_va->it.last = eoffset - 1; - interval_tree_insert(&bo_va->it, &vm->va); - spin_lock(&vm->status_lock); list_add(&bo_va->vm_status, &vm->cleared); spin_unlock(&vm->status_lock); + interval_tree_insert(&bo_va->it, &vm->va); } bo_va->flags = flags; @@ -1156,7 +1155,8 @@ void radeon_vm_bo_invalidate(struct radeon_device *rdev, list_for_each_entry(bo_va, &bo->va, bo_list) { spin_lock(&bo_va->vm->status_lock); - if (list_empty(&bo_va->vm_status)) + if (list_empty(&bo_va->vm_status) && + (bo_va->it.start || bo_va->it.last)) list_add(&bo_va->vm_status, &bo_va->vm->invalidated); spin_unlock(&bo_va->vm->status_lock); } -- cgit From 0f11770417d849558c727ed20e1cd07444e15a00 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 8 Jul 2015 16:58:48 +0200 Subject: drm/amdgpu: fix timeout calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christian König Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 975edb1000a2..ae43b58c9733 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -352,7 +352,7 @@ unsigned long amdgpu_gem_timeout(uint64_t timeout_ns) if (((int64_t)timeout_ns) < 0) return MAX_SCHEDULE_TIMEOUT; - timeout = ktime_sub_ns(ktime_get(), timeout_ns); + timeout = ktime_sub(ns_to_ktime(timeout_ns), ktime_get()); if (ktime_to_ns(timeout) < 0) return 0; -- cgit From 355c822847fa70fa1df6a7451b7ecf76116efcd2 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 8 Jul 2015 12:58:20 -0400 Subject: drm/radeon: disable vce init on cayman (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cayman does not have vce. There were a few places in the shared cayman/TV code where we were trying to do vce stuff. v2: remove -ENOENT check Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/radeon/ni.c | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c index 8e5aeeb058a5..158872eb78e4 100644 --- a/drivers/gpu/drm/radeon/ni.c +++ b/drivers/gpu/drm/radeon/ni.c @@ -2162,18 +2162,20 @@ static int cayman_startup(struct radeon_device *rdev) DRM_ERROR("radeon: failed initializing UVD (%d).\n", r); } - ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + if (rdev->family == CHIP_ARUBA) { + ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; - if (ring->ring_size) - r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); + ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX]; + if (ring->ring_size) + r = radeon_ring_init(rdev, ring, ring->ring_size, 0, 0x0); - if (!r) - r = vce_v1_0_init(rdev); - else if (r != -ENOENT) - DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + if (!r) + r = vce_v1_0_init(rdev); + if (r) + DRM_ERROR("radeon: failed initializing VCE (%d).\n", r); + } r = radeon_ib_pool_init(rdev); if (r) { @@ -2396,7 +2398,8 @@ void cayman_fini(struct radeon_device *rdev) radeon_irq_kms_fini(rdev); uvd_v1_0_fini(rdev); radeon_uvd_fini(rdev); - radeon_vce_fini(rdev); + if (rdev->family == CHIP_ARUBA) + radeon_vce_fini(rdev); cayman_pcie_gart_fini(rdev); r600_vram_scratch_fini(rdev); radeon_gem_fini(rdev); -- cgit From 757856d2b9568a701df9ea6a4be68effbb9d6f44 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 25 Jun 2015 17:47:45 +0300 Subject: libceph: enable ceph in a non-default network namespace Grab a reference on a network namespace of the 'rbd map' (in case of rbd) or 'mount' (in case of ceph) process and use that to open sockets instead of always using init_net and bailing if network namespace is anything but init_net. Be careful to not share struct ceph_client instances between different namespaces and don't add any code in the !CONFIG_NET_NS case. This is based on a patch from Hong Zhiguo . Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/messenger.h | 3 +++ net/ceph/ceph_common.c | 16 ++++++++++------ net/ceph/messenger.c | 10 +++++++++- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index e15499422fdc..37753278987a 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -56,6 +57,7 @@ struct ceph_messenger { struct ceph_entity_addr my_enc_addr; atomic_t stopping; + possible_net_t net; bool nocrc; bool tcp_nodelay; @@ -267,6 +269,7 @@ extern void ceph_messenger_init(struct ceph_messenger *msgr, u64 required_features, bool nocrc, bool tcp_nodelay); +extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, const struct ceph_connection_operations *ops, diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index cb7db320dd27..f30329f72641 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,8 +17,6 @@ #include #include #include -#include -#include #include @@ -131,6 +130,13 @@ int ceph_compare_options(struct ceph_options *new_opt, int i; int ret; + /* + * Don't bother comparing options if network namespaces don't + * match. + */ + if (!net_eq(current->nsproxy->net_ns, read_pnet(&client->msgr.net))) + return -1; + ret = memcmp(opt1, opt2, ofs); if (ret) return ret; @@ -335,9 +341,6 @@ ceph_parse_options(char *options, const char *dev_name, int err = -ENOMEM; substring_t argstr[MAX_OPT_ARGS]; - if (current->nsproxy->net_ns != &init_net) - return ERR_PTR(-EINVAL); - opt = kzalloc(sizeof(*opt), GFP_KERNEL); if (!opt) return ERR_PTR(-ENOMEM); @@ -608,6 +611,7 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private, fail_monc: ceph_monc_stop(&client->monc); fail: + ceph_messenger_fini(&client->msgr); kfree(client); return ERR_PTR(err); } @@ -621,8 +625,8 @@ void ceph_destroy_client(struct ceph_client *client) /* unmount */ ceph_osdc_stop(&client->osdc); - ceph_monc_stop(&client->monc); + ceph_messenger_fini(&client->msgr); ceph_debugfs_client_cleanup(client); diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 1679f47280e2..5c1f98ea6741 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -479,7 +480,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) int ret; BUG_ON(con->sock); - ret = sock_create_kern(&init_net, con->peer_addr.in_addr.ss_family, + ret = sock_create_kern(read_pnet(&con->msgr->net), paddr->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret) return ret; @@ -2944,11 +2945,18 @@ void ceph_messenger_init(struct ceph_messenger *msgr, msgr->tcp_nodelay = tcp_nodelay; atomic_set(&msgr->stopping, 0); + write_pnet(&msgr->net, get_net(current->nsproxy->net_ns)); dout("%s %p\n", __func__, msgr); } EXPORT_SYMBOL(ceph_messenger_init); +void ceph_messenger_fini(struct ceph_messenger *msgr) +{ + put_net(read_pnet(&msgr->net)); +} +EXPORT_SYMBOL(ceph_messenger_fini); + static void clear_standby(struct ceph_connection *con) { /* come back from STANDBY? */ -- cgit From c44bd69c0c8cfadf0239437635b2933efb1f6c4c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 9 Jul 2015 13:57:52 +0300 Subject: libceph: treat sockaddr_storage with uninitialized family as blank addr_is_blank() should return true if family is neither AF_INET nor AF_INET6. This is what its counterpart entity_addr_t::is_blank_ip() is doing and it is the right thing to do: in process_banner() we check if our address is blank and if it is "learn" it from our peer. As it is, we never learn our address and always send out a blank one. This goes way back to ceph.git commit dd732cbfc1c9 ("use sockaddr_storage; and some ipv6 support groundwork") from 2009. While at at, do not open-code ipv6_addr_any() and use INADDR_ANY constant instead of 0. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- net/ceph/messenger.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 5c1f98ea6741..e3be1d22a247 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1732,17 +1732,17 @@ static int verify_hello(struct ceph_connection *con) static bool addr_is_blank(struct sockaddr_storage *ss) { + struct in_addr *addr = &((struct sockaddr_in *)ss)->sin_addr; + struct in6_addr *addr6 = &((struct sockaddr_in6 *)ss)->sin6_addr; + switch (ss->ss_family) { case AF_INET: - return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; + return addr->s_addr == htonl(INADDR_ANY); case AF_INET6: - return - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && - ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; + return ipv6_addr_any(addr6); + default: + return true; } - return false; } static int addr_port(struct sockaddr_storage *ss) -- cgit From 398ecff5a562b7b69f77582f98a4b04d0de1f066 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:46:14 -0400 Subject: MAINTAINERS: update ceph entries - The Ceph common code is used by both fs/ceph and drivers/block/rbd. Add a separate maintainers entry. - Add Ilya as libceph maintainer and cephfs submaintainer. - Attribute Documentation/ABI/testing/sysfs-bus-rbd to rbd. - ceph-devel@vger.kernel.org should be L, not M in rbd entry. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 8133cefb6b6e..58cbc2118e46 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2562,19 +2562,29 @@ F: arch/powerpc/include/uapi/asm/spu*.h F: arch/powerpc/oprofile/*cell* F: arch/powerpc/platforms/cell/ -CEPH DISTRIBUTED FILE SYSTEM CLIENT +CEPH COMMON CODE (LIBCEPH) +M: Ilya Dryomov M: "Yan, Zheng" M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported -F: Documentation/filesystems/ceph.txt -F: fs/ceph/ F: net/ceph/ F: include/linux/ceph/ F: include/linux/crush/ +CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH) +M: "Yan, Zheng" +M: Sage Weil +M: Ilya Dryomov +L: ceph-devel@vger.kernel.org +W: http://ceph.com/ +T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +S: Supported +F: Documentation/filesystems/ceph.txt +F: fs/ceph/ + CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM: L: linux-usb@vger.kernel.org S: Orphan @@ -8366,10 +8376,11 @@ RADOS BLOCK DEVICE (RBD) M: Ilya Dryomov M: Sage Weil M: Alex Elder -M: ceph-devel@vger.kernel.org +L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git S: Supported +F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c F: drivers/block/rbd_types.h -- cgit From 6e67b7ae2157bdeb6b56381e530fc74bb68b6149 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 9 Jul 2015 11:47:37 -0400 Subject: MAINTAINERS: add secondary tree for ceph modules The Ceph kernel code is primarily developed in the github tree, and only pushed to the korg tree before going to Linus. If Sage is unavailable and another maintainer needs to push something upstream, pull requests may originate from the github tree instead of Sage's korg tree. Signed-off-by: Sage Weil Signed-off-by: Ilya Dryomov --- MAINTAINERS | 3 +++ 1 file changed, 3 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 58cbc2118e46..0d70760e8135 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2569,6 +2569,7 @@ M: Sage Weil L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: net/ceph/ F: include/linux/ceph/ @@ -2581,6 +2582,7 @@ M: Ilya Dryomov L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/filesystems/ceph.txt F: fs/ceph/ @@ -8379,6 +8381,7 @@ M: Alex Elder L: ceph-devel@vger.kernel.org W: http://ceph.com/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git +T: git git://github.com/ceph/ceph-client.git S: Supported F: Documentation/ABI/testing/sysfs-bus-rbd F: drivers/block/rbd.c -- cgit From 6b7339f4c31ad69c8e9c0b2859276e22cf72176d Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 6 Jul 2015 23:18:37 +0300 Subject: mm: avoid setting up anonymous pages into file mapping Reading page fault handler code I've noticed that under right circumstances kernel would map anonymous pages into file mappings: if the VMA doesn't have vm_ops->fault() and the VMA wasn't fully populated on ->mmap(), kernel would handle page fault to not populated pte with do_anonymous_page(). Let's change page fault handler to use do_anonymous_page() only on anonymous VMA (->vm_ops == NULL) and make sure that the VMA is not shared. For file mappings without vm_ops->fault() or shred VMA without vm_ops, page fault on pte_none() entry would lead to SIGBUS. Signed-off-by: Kirill A. Shutemov Acked-by: Oleg Nesterov Cc: Andrew Morton Cc: Willy Tarreau Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- mm/memory.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index a84fbb772034..388dcf9aa283 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2670,6 +2670,10 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, pte_unmap(page_table); + /* File mapping without ->vm_ops ? */ + if (vma->vm_flags & VM_SHARED) + return VM_FAULT_SIGBUS; + /* Check if we need to add a guard page to the stack */ if (check_stack_guard_page(vma, address) < 0) return VM_FAULT_SIGSEGV; @@ -3099,6 +3103,9 @@ static int do_fault(struct mm_struct *mm, struct vm_area_struct *vma, - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff; pte_unmap(page_table); + /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ + if (!vma->vm_ops->fault) + return VM_FAULT_SIGBUS; if (!(flags & FAULT_FLAG_WRITE)) return do_read_fault(mm, vma, address, pmd, pgoff, flags, orig_pte); @@ -3244,13 +3251,12 @@ static int handle_pte_fault(struct mm_struct *mm, barrier(); if (!pte_present(entry)) { if (pte_none(entry)) { - if (vma->vm_ops) { - if (likely(vma->vm_ops->fault)) - return do_fault(mm, vma, address, pte, - pmd, flags, entry); - } - return do_anonymous_page(mm, vma, address, - pte, pmd, flags); + if (vma->vm_ops) + return do_fault(mm, vma, address, pte, pmd, + flags, entry); + + return do_anonymous_page(mm, vma, address, pte, pmd, + flags); } return do_swap_page(mm, vma, address, pte, pmd, flags, entry); -- cgit From 6f957724b94cb19f5c1c97efd01dd4df8ced323c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 9 Jul 2015 11:20:01 -0700 Subject: Fix firmware loader uevent buffer NULL pointer dereference The firmware class uevent function accessed the "fw_priv->buf" buffer without the proper locking and testing for NULL. This is an old bug (looks like it goes back to 2012 and commit 1244691c73b2: "firmware loader: introduce firmware_buf"), but for some reason it's triggering only now in 4.2-rc1. Shuah Khan is trying to bisect what it is that causes this to trigger more easily, but in the meantime let's just fix the bug since others are hitting it too (at least Ingo reports having seen it as well). Reported-and-tested-by: Shuah Khan Acked-by: Ming Lei Cc: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- drivers/base/firmware_class.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c index 9c4288362a8e..894bda114224 100644 --- a/drivers/base/firmware_class.c +++ b/drivers/base/firmware_class.c @@ -563,10 +563,8 @@ static void fw_dev_release(struct device *dev) kfree(fw_priv); } -static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +static int do_firmware_uevent(struct firmware_priv *fw_priv, struct kobj_uevent_env *env) { - struct firmware_priv *fw_priv = to_firmware_priv(dev); - if (add_uevent_var(env, "FIRMWARE=%s", fw_priv->buf->fw_id)) return -ENOMEM; if (add_uevent_var(env, "TIMEOUT=%i", loading_timeout)) @@ -577,6 +575,18 @@ static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) return 0; } +static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + struct firmware_priv *fw_priv = to_firmware_priv(dev); + int err = 0; + + mutex_lock(&fw_lock); + if (fw_priv->buf) + err = do_firmware_uevent(fw_priv, env); + mutex_unlock(&fw_lock); + return err; +} + static struct class firmware_class = { .name = "firmware", .class_attrs = firmware_class_attrs, -- cgit From 3324603524925c7727207027d1c15e597412d15e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 9 Jul 2015 14:20:36 -0400 Subject: selinux: don't waste ebitmap space when importing NetLabel categories At present we don't create efficient ebitmaps when importing NetLabel category bitmaps. This can present a problem when comparing ebitmaps since ebitmap_cmp() is very strict about these things and considers these wasteful ebitmaps not equal when compared to their more efficient counterparts, even if their values are the same. This isn't likely to cause problems on 64-bit systems due to a bit of luck on how NetLabel/CIPSO works and the default ebitmap size, but it can be a problem on 32-bit systems. This patch fixes this problem by being a bit more intelligent when importing NetLabel category bitmaps by skipping over empty sections which should result in a nice, efficient ebitmap. Cc: stable@vger.kernel.org # 3.17 Signed-off-by: Paul Moore --- security/selinux/ss/ebitmap.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/security/selinux/ss/ebitmap.c b/security/selinux/ss/ebitmap.c index afe6a269ec17..57644b1dc42e 100644 --- a/security/selinux/ss/ebitmap.c +++ b/security/selinux/ss/ebitmap.c @@ -153,6 +153,12 @@ int ebitmap_netlbl_import(struct ebitmap *ebmap, if (offset == (u32)-1) return 0; + /* don't waste ebitmap space if the netlabel bitmap is empty */ + if (bitmap == 0) { + offset += EBITMAP_UNIT_SIZE; + continue; + } + if (e_iter == NULL || offset >= e_iter->startbit + EBITMAP_SIZE) { e_prev = e_iter; -- cgit From 9abea2d64ce93b6909de7f83a7f681f572369708 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Jul 2015 18:05:15 +0200 Subject: ioctl_compat: handle FITRIM The FITRIM ioctl has the same arguments on 32-bit and 64-bit architectures, so we can add it to the list of compatible ioctls and drop it from compat_ioctl method of various filesystems. Signed-off-by: Mikulas Patocka Cc: Al Viro Cc: Ted Ts'o Signed-off-by: Linus Torvalds --- fs/compat_ioctl.c | 1 + fs/ecryptfs/file.c | 1 - fs/ext4/ioctl.c | 1 - fs/jfs/ioctl.c | 3 --- fs/nilfs2/ioctl.c | 1 - fs/ocfs2/ioctl.c | 1 - 6 files changed, 1 insertion(+), 7 deletions(-) diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 6b8e2f091f5b..48851f6ea6ec 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -896,6 +896,7 @@ COMPATIBLE_IOCTL(FIGETBSZ) /* 'X' - originally XFS but some now in the VFS */ COMPATIBLE_IOCTL(FIFREEZE) COMPATIBLE_IOCTL(FITHAW) +COMPATIBLE_IOCTL(FITRIM) COMPATIBLE_IOCTL(KDGETKEYCODE) COMPATIBLE_IOCTL(KDSETKEYCODE) COMPATIBLE_IOCTL(KDGKBTYPE) diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 72afcc629d7b..feef8a9c4de7 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -325,7 +325,6 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return rc; switch (cmd) { - case FITRIM: case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: case FS_IOC32_GETVERSION: diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index cb8451246b30..1346cfa355d0 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -755,7 +755,6 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) return err; } case EXT4_IOC_MOVE_EXT: - case FITRIM: case EXT4_IOC_RESIZE_FS: case EXT4_IOC_PRECACHE_EXTENTS: case EXT4_IOC_SET_ENCRYPTION_POLICY: diff --git a/fs/jfs/ioctl.c b/fs/jfs/ioctl.c index 93a1232894f6..8db8b7d61e40 100644 --- a/fs/jfs/ioctl.c +++ b/fs/jfs/ioctl.c @@ -180,9 +180,6 @@ long jfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case JFS_IOC_SETFLAGS32: cmd = JFS_IOC_SETFLAGS; break; - case FITRIM: - cmd = FITRIM; - break; } return jfs_ioctl(filp, cmd, arg); } diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 9a20e513d7eb..aba43811d6ef 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -1369,7 +1369,6 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) case NILFS_IOCTL_SYNC: case NILFS_IOCTL_RESIZE: case NILFS_IOCTL_SET_ALLOC_RANGE: - case FITRIM: break; default: return -ENOIOCTLCMD; diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 53e6c40ed4c6..3cb097ccce60 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c @@ -980,7 +980,6 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) case OCFS2_IOC_GROUP_EXTEND: case OCFS2_IOC_GROUP_ADD: case OCFS2_IOC_GROUP_ADD64: - case FITRIM: break; case OCFS2_IOC_REFLINK: if (copy_from_user(&args, argp, sizeof(args))) -- cgit From fc0a1f035cf15f704f4092da294963c57e94c1c0 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 14:12:10 +0200 Subject: i2c: I2C_MT65XX should depend on HAS_DMA If NO_DMA=y: ERROR: "dma_unmap_single" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! ERROR: "dma_mapping_error" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! ERROR: "dma_map_single" [drivers/i2c/busses/i2c-mt65xx.ko] undefined! Add a dependency on HAS_DMA to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: Wolfram Sang --- drivers/i2c/busses/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index 35ac23768ce9..577d58d1f1a1 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -633,6 +633,7 @@ config I2C_MPC config I2C_MT65XX tristate "MediaTek I2C adapter" depends on ARCH_MEDIATEK || COMPILE_TEST + depends on HAS_DMA help This selects the MediaTek(R) Integrated Inter Circuit bus driver for MT65xx and MT81xx. -- cgit From 724948106ed236fc528c720ae12c79af7e2aea4e Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 30 Jun 2015 11:08:46 +0800 Subject: i2c: xgene-slimpro: Fix missing mbox_free_channel call in probe error path Free requested mailbox channel before return error. Signed-off-by: Axel Lin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-xgene-slimpro.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/i2c/busses/i2c-xgene-slimpro.c b/drivers/i2c/busses/i2c-xgene-slimpro.c index dcca7076231e..1c9cb65ac4cf 100644 --- a/drivers/i2c/busses/i2c-xgene-slimpro.c +++ b/drivers/i2c/busses/i2c-xgene-slimpro.c @@ -419,6 +419,7 @@ static int xgene_slimpro_i2c_probe(struct platform_device *pdev) rc = i2c_add_adapter(adapter); if (rc) { dev_err(&pdev->dev, "Adapter registeration failed\n"); + mbox_free_channel(ctx->mbox_chan); return rc; } -- cgit From eb8173e3d7b94664ecd3acf34942c9c9d6f6cb73 Mon Sep 17 00:00:00 2001 From: Axel Lin Date: Tue, 30 Jun 2015 11:41:58 +0800 Subject: i2c: jz4780: Fix return value if probe fails Current code returns 0 if fails to read clock-frequency DT property, fix it. Also add checking return value of clk_prepare_enable and propagate return value of devm_request_irq. Signed-off-by: Axel Lin Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-jz4780.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/i2c/busses/i2c-jz4780.c b/drivers/i2c/busses/i2c-jz4780.c index 19b2d689a5ef..f325663c27c5 100644 --- a/drivers/i2c/busses/i2c-jz4780.c +++ b/drivers/i2c/busses/i2c-jz4780.c @@ -764,12 +764,15 @@ static int jz4780_i2c_probe(struct platform_device *pdev) if (IS_ERR(i2c->clk)) return PTR_ERR(i2c->clk); - clk_prepare_enable(i2c->clk); + ret = clk_prepare_enable(i2c->clk); + if (ret) + return ret; - if (of_property_read_u32(pdev->dev.of_node, "clock-frequency", - &clk_freq)) { + ret = of_property_read_u32(pdev->dev.of_node, "clock-frequency", + &clk_freq); + if (ret) { dev_err(&pdev->dev, "clock-frequency not specified in DT"); - return clk_freq; + goto err; } i2c->speed = clk_freq / 1000; @@ -790,10 +793,8 @@ static int jz4780_i2c_probe(struct platform_device *pdev) i2c->irq = platform_get_irq(pdev, 0); ret = devm_request_irq(&pdev->dev, i2c->irq, jz4780_i2c_irq, 0, dev_name(&pdev->dev), i2c); - if (ret) { - ret = -ENODEV; + if (ret) goto err; - } ret = i2c_add_adapter(&i2c->adap); if (ret < 0) { -- cgit From 4f001fd30145a6a8f72f9544c982cfd3dcb7c6df Mon Sep 17 00:00:00 2001 From: Pantelis Antoniou Date: Sat, 24 Jan 2015 09:16:29 +0200 Subject: i2c: Mark instantiated device nodes with OF_POPULATE Mark (and unmark) device nodes with the POPULATE flag as appropriate. This is required to avoid multi probing when using I2C and device overlays containing a mux. This patch is also more careful with the release of the adapter device which caused a deadlock with muxes, and does not break the build on !OF since the node flag accessors are not defined then. Signed-off-by: Pantelis Antoniou Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c index 069a41f116dd..e6d4935161e4 100644 --- a/drivers/i2c/i2c-core.c +++ b/drivers/i2c/i2c-core.c @@ -1012,6 +1012,8 @@ EXPORT_SYMBOL_GPL(i2c_new_device); */ void i2c_unregister_device(struct i2c_client *client) { + if (client->dev.of_node) + of_node_clear_flag(client->dev.of_node, OF_POPULATED); device_unregister(&client->dev); } EXPORT_SYMBOL_GPL(i2c_unregister_device); @@ -1320,8 +1322,11 @@ static void of_i2c_register_devices(struct i2c_adapter *adap) dev_dbg(&adap->dev, "of_i2c: walking child nodes\n"); - for_each_available_child_of_node(adap->dev.of_node, node) + for_each_available_child_of_node(adap->dev.of_node, node) { + if (of_node_test_and_set_flag(node, OF_POPULATED)) + continue; of_i2c_register_device(adap, node); + } } static int of_dev_node_match(struct device *dev, void *data) @@ -1853,6 +1858,11 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, if (adap == NULL) return NOTIFY_OK; /* not for us */ + if (of_node_test_and_set_flag(rd->dn, OF_POPULATED)) { + put_device(&adap->dev); + return NOTIFY_OK; + } + client = of_i2c_register_device(adap, rd->dn); put_device(&adap->dev); @@ -1863,6 +1873,10 @@ static int of_i2c_notify(struct notifier_block *nb, unsigned long action, } break; case OF_RECONFIG_CHANGE_REMOVE: + /* already depopulated? */ + if (!of_node_check_flag(rd->dn, OF_POPULATED)) + return NOTIFY_OK; + /* find our device by node */ client = of_find_i2c_device_by_node(rd->dn); if (client == NULL) -- cgit From a27b5b97d6fe91f55058ad8ac28a8768700201ab Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Sun, 28 Jun 2015 15:16:57 +0200 Subject: hpfs: add fstrim support This patch adds support for fstrim to the HPFS filesystem. Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- fs/hpfs/alloc.c | 95 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/hpfs/dir.c | 1 + fs/hpfs/file.c | 1 + fs/hpfs/hpfs_fn.h | 4 +++ fs/hpfs/super.c | 27 ++++++++++++++++ 5 files changed, 128 insertions(+) diff --git a/fs/hpfs/alloc.c b/fs/hpfs/alloc.c index f005046e1591..d6a4b55d2ab0 100644 --- a/fs/hpfs/alloc.c +++ b/fs/hpfs/alloc.c @@ -484,3 +484,98 @@ struct anode *hpfs_alloc_anode(struct super_block *s, secno near, anode_secno *a a->btree.first_free = cpu_to_le16(8); return a; } + +static unsigned find_run(__le32 *bmp, unsigned *idx) +{ + unsigned len; + while (tstbits(bmp, *idx, 1)) { + (*idx)++; + if (unlikely(*idx >= 0x4000)) + return 0; + } + len = 1; + while (!tstbits(bmp, *idx + len, 1)) + len++; + return len; +} + +static int do_trim(struct super_block *s, secno start, unsigned len, secno limit_start, secno limit_end, unsigned minlen, unsigned *result) +{ + int err; + secno end; + if (fatal_signal_pending(current)) + return -EINTR; + end = start + len; + if (start < limit_start) + start = limit_start; + if (end > limit_end) + end = limit_end; + if (start >= end) + return 0; + if (end - start < minlen) + return 0; + err = sb_issue_discard(s, start, end - start, GFP_NOFS, 0); + if (err) + return err; + *result += end - start; + return 0; +} + +int hpfs_trim_fs(struct super_block *s, u64 start, u64 end, u64 minlen, unsigned *result) +{ + int err = 0; + struct hpfs_sb_info *sbi = hpfs_sb(s); + unsigned idx, len, start_bmp, end_bmp; + __le32 *bmp; + struct quad_buffer_head qbh; + + *result = 0; + if (!end || end > sbi->sb_fs_size) + end = sbi->sb_fs_size; + if (start >= sbi->sb_fs_size) + return 0; + if (minlen > 0x4000) + return 0; + if (start < sbi->sb_dirband_start + sbi->sb_dirband_size && end > sbi->sb_dirband_start) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_1; + } + if (!(bmp = hpfs_map_dnode_bitmap(s, &qbh))) { + err = -EIO; + goto unlock_1; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, sbi->sb_dirband_start + idx * 4, len * 4, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_1: + hpfs_unlock(s); + } + start_bmp = start >> 14; + end_bmp = (end + 0x3fff) >> 14; + while (start_bmp < end_bmp && !err) { + hpfs_lock(s); + if (s->s_flags & MS_RDONLY) { + err = -EROFS; + goto unlock_2; + } + if (!(bmp = hpfs_map_bitmap(s, start_bmp, &qbh, "trim"))) { + err = -EIO; + goto unlock_2; + } + idx = 0; + while ((len = find_run(bmp, &idx)) && !err) { + err = do_trim(s, (start_bmp << 14) + idx, len, start, end, minlen, result); + idx += len; + } + hpfs_brelse4(&qbh); +unlock_2: + hpfs_unlock(s); + start_bmp++; + } + return err; +} diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 2a8e07425de0..dc540bfcee1d 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c @@ -327,4 +327,5 @@ const struct file_operations hpfs_dir_ops = .iterate = hpfs_readdir, .release = hpfs_dir_release, .fsync = hpfs_file_fsync, + .unlocked_ioctl = hpfs_ioctl, }; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 6d8cfe9b52d6..7ca28d604bf7 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -203,6 +203,7 @@ const struct file_operations hpfs_file_ops = .release = hpfs_file_release, .fsync = hpfs_file_fsync, .splice_read = generic_file_splice_read, + .unlocked_ioctl = hpfs_ioctl, }; const struct inode_operations hpfs_file_iops = diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index bb04b58d1d69..c4867b5116dd 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include "hpfs.h" @@ -200,6 +202,7 @@ void hpfs_free_dnode(struct super_block *, secno); struct dnode *hpfs_alloc_dnode(struct super_block *, secno, dnode_secno *, struct quad_buffer_head *); struct fnode *hpfs_alloc_fnode(struct super_block *, secno, fnode_secno *, struct buffer_head **); struct anode *hpfs_alloc_anode(struct super_block *, secno, anode_secno *, struct buffer_head **); +int hpfs_trim_fs(struct super_block *, u64, u64, u64, unsigned *); /* anode.c */ @@ -318,6 +321,7 @@ __printf(2, 3) void hpfs_error(struct super_block *, const char *, ...); int hpfs_stop_cycles(struct super_block *, int, int *, int *, char *); unsigned hpfs_get_free_dnodes(struct super_block *); +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg); /* * local time (HPFS) to GMT (Unix) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 7cd00d3a7c9b..037e3e597ff4 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -196,6 +196,33 @@ static int hpfs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } + +long hpfs_ioctl(struct file *file, unsigned cmd, unsigned long arg) +{ + switch (cmd) { + case FITRIM: { + struct fstrim_range range; + secno n_trimmed; + int r; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) + return -EFAULT; + r = hpfs_trim_fs(file_inode(file)->i_sb, range.start >> 9, (range.start + range.len) >> 9, (range.minlen + 511) >> 9, &n_trimmed); + if (r) + return r; + range.len = (u64)n_trimmed << 9; + if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) + return -EFAULT; + return 0; + } + default: { + return -ENOIOCTLCMD; + } + } +} + + static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) -- cgit From d7b04097c250e6322ba73d3b03337074afeeb314 Mon Sep 17 00:00:00 2001 From: Firo Yang Date: Thu, 23 Apr 2015 17:28:45 +0800 Subject: hpfs: Remove unessary cast Avoid a pointless kmem_cache_alloc() return value cast in fs/hpfs/super.c::hpfs_alloc_inode() Signed-off-by: Firo Yang Signed-off-by: Mikulas Patocka Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 037e3e597ff4..0642516d36c8 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -228,7 +228,7 @@ static struct kmem_cache * hpfs_inode_cachep; static struct inode *hpfs_alloc_inode(struct super_block *sb) { struct hpfs_inode_info *ei; - ei = (struct hpfs_inode_info *)kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); + ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); if (!ei) return NULL; ei->vfs_inode.i_version = 1; -- cgit From ce657611baf902f14ae559ce4e0787ead6712067 Mon Sep 17 00:00:00 2001 From: Sanidhya Kashyap Date: Sat, 21 Mar 2015 12:57:50 -0400 Subject: hpfs: kstrdup() out of memory handling There is a possibility of nothing being allocated to the new_opts in case of memory pressure, therefore return ENOMEM for such case. Signed-off-by: Sanidhya Kashyap Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 0642516d36c8..cde044d41f69 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -451,11 +451,14 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) int o; struct hpfs_sb_info *sbi = hpfs_sb(s); char *new_opts = kstrdup(data, GFP_KERNEL); - + + if (!new_opts) + return -ENOMEM; + sync_filesystem(s); *flags |= MS_NOATIME; - + hpfs_lock(s); uid = sbi->sb_uid; gid = sbi->sb_gid; umask = 0777 & ~sbi->sb_mode; -- cgit From a28e4b2b18ccb90df402da3f21e1a83c9d4f8ec1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 26 Mar 2015 20:47:10 -0700 Subject: hpfs: hpfs_error: Remove static buffer, use vsprintf extension %pV instead Removing unnecessary static buffers is good. Use the vsprintf %pV extension instead. Signed-off-by: Joe Perches Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org # v2.6.36+ Signed-off-by: Linus Torvalds --- fs/hpfs/super.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index cde044d41f69..68a9bed05628 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -52,17 +52,20 @@ static void unmark_dirty(struct super_block *s) } /* Filesystem error... */ -static char err_buf[1024]; - void hpfs_error(struct super_block *s, const char *fmt, ...) { + struct va_format vaf; va_list args; va_start(args, fmt); - vsnprintf(err_buf, sizeof(err_buf), fmt, args); + + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("filesystem error: %pV", &vaf); + va_end(args); - pr_err("filesystem error: %s", err_buf); if (!hpfs_sb(s)->sb_was_error) { if (hpfs_sb(s)->sb_err == 2) { pr_cont("; crashing the system because you wanted it\n"); -- cgit From 2c069a118fe1d80c47dca84e1561045fc7f3cc9e Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 10 Jul 2015 09:04:25 +1000 Subject: cxl: Check if afu is not null in cxl_slbia The pointer to an AFU in the adapter's list of AFUs can be null if we're in the process of removing AFUs. The afu_list_lock doesn't guard against this. Say we have 2 slices, and we're in the process of removing cxl. - We remove the AFUs in order (see cxl_remove). In cxl_remove_afu for AFU 0, we take the lock, set adapter->afu[0] = NULL, and release the lock. - Then we get an slbia. In cxl_slbia we take the lock, and set afu = adapter->afu[0], which is NULL. - Therefore our attempt to check afu->enabled will blow up. Therefore, check if afu is a null pointer before dereferencing it. Cc: stable@vger.kernel.org Signed-off-by: Daniel Axtens Acked-by: Michael Neuling Acked-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/misc/cxl/main.c b/drivers/misc/cxl/main.c index 833348e2c9cb..4a164ab8b35a 100644 --- a/drivers/misc/cxl/main.c +++ b/drivers/misc/cxl/main.c @@ -73,7 +73,7 @@ static inline void cxl_slbia_core(struct mm_struct *mm) spin_lock(&adapter->afu_list_lock); for (slice = 0; slice < adapter->slices; slice++) { afu = adapter->afu[slice]; - if (!afu->enabled) + if (!afu || !afu->enabled) continue; rcu_read_lock(); idr_for_each_entry(&afu->contexts_idr, ctx, id) -- cgit From cccf34e9411c41b0cbfb41980fe55fc8e7c98fd2 Mon Sep 17 00:00:00 2001 From: Markos Chandras Date: Fri, 10 Jul 2015 09:29:10 +0100 Subject: MIPS: c-r4k: Fix cache flushing for MT cores MT_SMP is not the only SMP option for MT cores. The MT_SMP option allows more than one VPE per core to appear as a secondary CPU in the system. Because of how CM works, it propagates the address-based cache ops to the secondary cores but not the index-based ones. Because of that, the code does not use IPIs to flush the L1 caches on secondary cores because the CM would have done that already. However, the CM functionality is independent of the type of SMP kernel so even in non-MT kernels, IPIs are not necessary. As a result of which, we change the conditional to depend on the CM presence. Moreover, since VPEs on the same core share the same L1 caches, there is no need to send an IPI on all of them so we calculate a suitable cpumask with only one VPE per core. Signed-off-by: Markos Chandras Cc: # 3.15+ Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10654/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/smp.h | 1 + arch/mips/kernel/smp.c | 44 +++++++++++++++++++++++++++++++++++++++++++- arch/mips/mm/c-r4k.c | 14 +++++++++++--- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 2b25d1ba1ea0..16f1ea9ab191 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -23,6 +23,7 @@ extern int smp_num_siblings; extern cpumask_t cpu_sibling_map[]; extern cpumask_t cpu_core_map[]; +extern cpumask_t cpu_foreign_map; #define raw_smp_processor_id() (current_thread_info()->cpu) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index faa46ebd9dda..d0744cc77ea7 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -63,6 +63,13 @@ EXPORT_SYMBOL(cpu_sibling_map); cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); +/* + * A logcal cpu mask containing only one VPE per core to + * reduce the number of IPIs on large MT systems. + */ +cpumask_t cpu_foreign_map __read_mostly; +EXPORT_SYMBOL(cpu_foreign_map); + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -103,6 +110,29 @@ static inline void set_cpu_core_map(int cpu) } } +/* + * Calculate a new cpu_foreign_map mask whenever a + * new cpu appears or disappears. + */ +static inline void calculate_cpu_foreign_map(void) +{ + int i, k, core_present; + cpumask_t temp_foreign_map; + + /* Re-calculate the mask */ + for_each_online_cpu(i) { + core_present = 0; + for_each_cpu(k, &temp_foreign_map) + if (cpu_data[i].package == cpu_data[k].package && + cpu_data[i].core == cpu_data[k].core) + core_present = 1; + if (!core_present) + cpumask_set_cpu(i, &temp_foreign_map); + } + + cpumask_copy(&cpu_foreign_map, &temp_foreign_map); +} + struct plat_smp_ops *mp_ops; EXPORT_SYMBOL(mp_ops); @@ -146,6 +176,8 @@ asmlinkage void start_secondary(void) set_cpu_sibling_map(cpu); set_cpu_core_map(cpu); + calculate_cpu_foreign_map(); + cpumask_set_cpu(cpu, &cpu_callin_map); synchronise_count_slave(cpu); @@ -173,9 +205,18 @@ void __irq_entry smp_call_function_interrupt(void) static void stop_this_cpu(void *dummy) { /* - * Remove this CPU: + * Remove this CPU. Be a bit slow here and + * set the bits for every online CPU so we don't miss + * any IPI whilst taking this VPE down. */ + + cpumask_copy(&cpu_foreign_map, cpu_online_mask); + + /* Make it visible to every other CPU */ + smp_mb(); + set_cpu_online(smp_processor_id(), false); + calculate_cpu_foreign_map(); local_irq_disable(); while (1); } @@ -197,6 +238,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) mp_ops->prepare_cpus(max_cpus); set_cpu_sibling_map(0); set_cpu_core_map(0); + calculate_cpu_foreign_map(); #ifndef CONFIG_HOTPLUG_CPU init_cpu_present(cpu_possible_mask); #endif diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 7f660dc67596..a5974ddba44b 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -37,6 +37,7 @@ #include /* for run_uncached() */ #include #include +#include /* * Special Variant of smp_call_function for use by cache functions: @@ -51,9 +52,16 @@ static inline void r4k_on_each_cpu(void (*func) (void *info), void *info) { preempt_disable(); -#ifndef CONFIG_MIPS_MT_SMP - smp_call_function(func, info, 1); -#endif + /* + * The Coherent Manager propagates address-based cache ops to other + * cores but not index-based ops. However, r4k_on_each_cpu is used + * in both cases so there is no easy way to tell what kind of op is + * executed to the other cores. The best we can probably do is + * to restrict that call when a CM is not present because both + * CM-based SMP protocols (CMP & CPS) restrict index-based cache ops. + */ + if (!mips_cm_present()) + smp_call_function_many(&cpu_foreign_map, func, info, 1); func(info); preempt_enable(); } -- cgit From 6249ecbbb75cd635025cc681fcf51fb8659edbab Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 17 Apr 2015 10:44:15 +0100 Subject: MIPS: Malta: Make GIC FDC IRQ workaround Malta specific Wider testing reveals that the Fast Debug Channel (FDC) interrupt is routed through the GIC just fine on Pistachio SoC, even though it contains interAptiv cores. Clearly the FDC interrupt routing problems previously observed on interAptiv and proAptiv cores are specific to the Malta FPGA bitstreams. Move the workaround for interAptiv and proAptiv out of gic_get_c0_fdc_int() in the GIC irqchip driver into Malta's get_c0_fdc_int() platform callback, to allow the Pistachio SoC to use the FDC interrupt. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Andrew Bresticker Cc: Thomas Gleixner Cc: Jason Cooper Cc: linux-mips@linux-mips.org Reviewed-by: Andrew Bresticker Cc: James Hartley Patchwork: http://patchwork.linux-mips.org/patch/9748/ Signed-off-by: Ralf Baechle --- arch/mips/mti-malta/malta-time.c | 20 +++++++++++++------- drivers/irqchip/irq-mips-gic.c | 10 ---------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c index 185e68261f45..5625b190edc0 100644 --- a/arch/mips/mti-malta/malta-time.c +++ b/arch/mips/mti-malta/malta-time.c @@ -119,18 +119,24 @@ void read_persistent_clock(struct timespec *ts) int get_c0_fdc_int(void) { - int mips_cpu_fdc_irq; + /* + * Some cores claim the FDC is routable through the GIC, but it doesn't + * actually seem to be connected for those Malta bitstreams. + */ + switch (current_cpu_type()) { + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + return -1; + }; if (cpu_has_veic) - mips_cpu_fdc_irq = -1; + return -1; else if (gic_present) - mips_cpu_fdc_irq = gic_get_c0_fdc_int(); + return gic_get_c0_fdc_int(); else if (cp0_fdc_irq >= 0) - mips_cpu_fdc_irq = MIPS_CPU_IRQ_BASE + cp0_fdc_irq; + return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; else - mips_cpu_fdc_irq = -1; - - return mips_cpu_fdc_irq; + return -1; } int get_c0_perfcount_int(void) diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c index 4400edd1a6c7..b7d54d428b5e 100644 --- a/drivers/irqchip/irq-mips-gic.c +++ b/drivers/irqchip/irq-mips-gic.c @@ -257,16 +257,6 @@ int gic_get_c0_fdc_int(void) return MIPS_CPU_IRQ_BASE + cp0_fdc_irq; } - /* - * Some cores claim the FDC is routable but it doesn't actually seem to - * be connected. - */ - switch (current_cpu_type()) { - case CPU_INTERAPTIV: - case CPU_PROAPTIV: - return -1; - } - return irq_create_mapping(gic_irq_domain, GIC_LOCAL_TO_HWIRQ(GIC_LOCAL_INT_FDC)); } -- cgit From 6b5e741e9a834a8cf2d5b895319045ab17ad37fe Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 17 Apr 2015 10:44:16 +0100 Subject: MIPS: Pistachio: Support CDMM & Fast Debug Channel Implement the mips_cdmm_phys_base() platform callback to provide a default Common Device Memory Map (CDMM) physical base address for the Pistachio SoC. This allows the CDMM in each VPE to be configured and probed for devices, such as the Fast Debug Channel (FDC). The physical address chosen is just below the default CPC address, which appears to also be unallocated. The FDC IRQ is also usable on Pistachio, and is routed through the GIC, so implement the get_c0_fdc_int() platform callback using gic_get_c0_fdc_int(), so the FDC driver doesn't have to fall back to polling. Signed-off-by: James Hogan Cc: Ralf Baechle Cc: Andrew Bresticker Cc: James Hartley Cc: linux-mips@linux-mips.org Reviewed-by: Andrew Bresticker Patchwork: http://patchwork.linux-mips.org/patch/9749/ Signed-off-by: Ralf Baechle --- arch/mips/pistachio/init.c | 8 +++++++- arch/mips/pistachio/time.c | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c index d2dc836523a3..8bd8ebb20a72 100644 --- a/arch/mips/pistachio/init.c +++ b/arch/mips/pistachio/init.c @@ -63,13 +63,19 @@ void __init plat_mem_setup(void) plat_setup_iocoherency(); } -#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CPC_BASE_ADDR 0x1bde0000 +#define DEFAULT_CDMM_BASE_ADDR 0x1bdd0000 phys_addr_t mips_cpc_default_phys_base(void) { return DEFAULT_CPC_BASE_ADDR; } +phys_addr_t mips_cdmm_phys_base(void) +{ + return DEFAULT_CDMM_BASE_ADDR; +} + static void __init mips_nmi_setup(void) { void *base; diff --git a/arch/mips/pistachio/time.c b/arch/mips/pistachio/time.c index 67889fcea8aa..7c73fcb92a10 100644 --- a/arch/mips/pistachio/time.c +++ b/arch/mips/pistachio/time.c @@ -27,6 +27,11 @@ int get_c0_perfcount_int(void) return gic_get_c0_perfcount_int(); } +int get_c0_fdc_int(void) +{ + return gic_get_c0_fdc_int(); +} + void __init plat_time_init(void) { struct device_node *np; -- cgit From 1e18ac7aeaec357048172695b1fbb461205b166f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Thu, 9 Jul 2015 10:40:41 +0100 Subject: MIPS: c-r4k: Extend way_string array The L2 cache in the I6400 core has 16 ways, so extend the way_string array to take such caches into account. [ralf@linux-mips.org: Other already supported CPUs are free to support more than 8 ways of cache as well.] Signed-off-by: Paul Burton Signed-off-by: Markos Chandras Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/10640/ Signed-off-by: Ralf Baechle --- arch/mips/mm/c-r4k.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index a5974ddba44b..fbea4432f3f2 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -945,7 +945,9 @@ static void b5k_instruction_hazard(void) } static char *way_string[] = { NULL, "direct mapped", "2-way", - "3-way", "4-way", "5-way", "6-way", "7-way", "8-way" + "3-way", "4-way", "5-way", "6-way", "7-way", "8-way", + "9-way", "10-way", "11-way", "12-way", + "13-way", "14-way", "15-way", "16-way", }; static void probe_pcache(void) -- cgit From 51d53674c3fff11e9231e66ca8616c65deadfb6c Mon Sep 17 00:00:00 2001 From: Ralf Baechle Date: Thu, 9 Jul 2015 18:02:51 +0200 Subject: MIPS: O32: Use compat_sys_getsockopt. We were using the native syscall and that results in subtle breakage. This is the same issue as fixed in 077d0e65618f27b2199d622e12ada6d8f3dbd862 (MIPS: N32: Use compat getsockopt syscall) but that commit did fix it only for N32. Signed-off-by: Ralf Baechle Link: https://bugzilla.kernel.org/show_bug.cgi?id=100291 --- arch/mips/kernel/scall64-o32.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 66d618bb2fa2..f543ff4feef9 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -400,7 +400,7 @@ EXPORT(sys32_call_table) PTR sys_connect /* 4170 */ PTR sys_getpeername PTR sys_getsockname - PTR sys_getsockopt + PTR compat_sys_getsockopt PTR sys_listen PTR compat_sys_recv /* 4175 */ PTR compat_sys_recvfrom -- cgit From eeedcea69e927857d32aaf089725eddd2c79dd0a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 26 Jun 2015 08:09:29 +0100 Subject: ARM: 8395/1: l2c: Add support for the "arm,shared-override" property "CoreLink Level 2 Cache Controller L2C-310", p. 2-15, section 2.3.2 Shareable attribute" states: "The default behavior of the cache controller with respect to the shareable attribute is to transform Normal Memory Non-cacheable transactions into: - cacheable no allocate for reads - write through no write allocate for writes." Depending on the system architecture, this may cause memory corruption in the presence of bus mastering devices (e.g. OHCI). To avoid such corruption, the default behavior can be disabled by setting the Shared Override bit in the Auxiliary Control register. Currently the Shared Override bit can be set only using C code: - by calling l2x0_init() directly, which is deprecated, - by setting/clearing the bit in the machine_desc.l2c_aux_val/mask fields, but using values differing from 0/~0 is also deprecated. Hence add support for an "arm,shared-override" device tree property for the l2c device node. By specifying this property, affected systems can indicate that non-cacheable transactions must not be transformed. Then, it's up to the OS to decide. The current behavior is to set the "shared attribute override enable" bit, as there may exist kernel linear mappings and cacheable aliases for the DMA buffers, even if CMA is enabled. See also commit 1a8e41cd672f894b ("ARM: 6395/1: VExpress: Set bit 22 in the PL310 (cache controller) AuxCtlr register"): "Clearing bit 22 in the PL310 Auxiliary Control register (shared attribute override enable) has the side effect of transforming Normal Shared Non-cacheable reads into Cacheable no-allocate reads. Coherent DMA buffers in Linux always have a Cacheable alias via the kernel linear mapping and the processor can speculatively load cache lines into the PL310 controller. With bit 22 cleared, Non-cacheable reads would unexpectedly hit such cache lines leading to buffer corruption." Signed-off-by: Geert Uytterhoeven Acked-by: Catalin Marinas Signed-off-by: Russell King --- Documentation/devicetree/bindings/arm/l2cc.txt | 6 ++++++ arch/arm/mm/cache-l2x0.c | 5 +++++ 2 files changed, 11 insertions(+) diff --git a/Documentation/devicetree/bindings/arm/l2cc.txt b/Documentation/devicetree/bindings/arm/l2cc.txt index 2251dccb141e..06c88a4d28ac 100644 --- a/Documentation/devicetree/bindings/arm/l2cc.txt +++ b/Documentation/devicetree/bindings/arm/l2cc.txt @@ -67,6 +67,12 @@ Optional properties: disable if zero. - arm,prefetch-offset : Override prefetch offset value. Valid values are 0-7, 15, 23, and 31. +- arm,shared-override : The default behavior of the pl310 cache controller with + respect to the shareable attribute is to transform "normal memory + non-cacheable transactions" into "cacheable no allocate" (for reads) or + "write through no write allocate" (for writes). + On systems where this may cause DMA buffer corruption, this property must be + specified to indicate that such transforms are precluded. - prefetch-data : Data prefetch. Value: <0> (forcibly disable), <1> (forcibly enable), property absent (retain settings set by firmware) - prefetch-instr : Instruction prefetch. Value: <0> (forcibly disable), diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index 71b3d3309024..493692d838c6 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -1171,6 +1171,11 @@ static void __init l2c310_of_parse(const struct device_node *np, } } + if (of_property_read_bool(np, "arm,shared-override")) { + *aux_val |= L2C_AUX_CTRL_SHARED_OVERRIDE; + *aux_mask &= ~L2C_AUX_CTRL_SHARED_OVERRIDE; + } + prefetch = l2x0_saved_regs.prefetch_ctrl; ret = of_property_read_u32(np, "arm,double-linefill", &val); -- cgit From 8ded1e1a92daa96307e4b84b707fee5993bc6047 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 7 Jul 2015 18:16:15 +0100 Subject: ARM: 8401/1: perf: Set affinity for PPI based PMUs For PPI based PMUs, we bail out early in of_pmu_irq_cfg() without setting the PMU's supported_cpus bitmap. This causes the smp_call_function_any() in armv7_probe_num_events() to fail. Set the bitmap to be all CPUs so that we properly probe PMUs that use PPIs. Fixes: cc88116da0d1 ("arm: perf: treat PMUs as CPU affine") Cc: Mark Rutland Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 357f57ea83f4..f3ddd0ff2d8b 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -795,8 +795,10 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); - if (irq >= 0 && irq_is_percpu(irq)) + if (irq >= 0 && irq_is_percpu(irq)) { + cpumask_setall(&pmu->supported_cpus); return 0; + } irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); if (!irqs) -- cgit From 57853e8906a04a86c32fb96d8421b923c6d64162 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 7 Jul 2015 18:19:38 +0100 Subject: ARM: 8403/1: kbuild: don't use generic mcs_spinlock.h header We provide our own implementation of asm/mcs_spinlock.h, so there's no need to ask for the (empty) generic version. Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 83c50193626c..517ef6dd22b9 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += irq_regs.h generic-y += kdebug.h generic-y += local.h generic-y += local64.h -generic-y += mcs_spinlock.h generic-y += msgbuf.h generic-y += param.h generic-y += parport.h -- cgit From ad2daa85bd50b7ff5c851b80a0b813bdc8d14f8e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 10 Jul 2015 15:46:32 +0100 Subject: arm64: entry32: remove pointless register assignment We currently set x27 in compat_sys_sigreturn_wrapper and compat_sys_rt_sigreturn_wrapper, similarly to what we do with r8/why on 32-bit ARM, in an attempt to prevent sigreturns from being restarted. However, on arm64 we have always used pt_regs::syscallno for syscall restarting (for both native and compat tasks), and x27 is never inspected again before being overwritten in kernel_exit. This patch removes the pointless register assignments. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry32.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/arm64/kernel/entry32.S b/arch/arm64/kernel/entry32.S index bd9bfaa9269b..f332d5d1f6b4 100644 --- a/arch/arm64/kernel/entry32.S +++ b/arch/arm64/kernel/entry32.S @@ -32,13 +32,11 @@ ENTRY(compat_sys_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_sigreturn ENDPROC(compat_sys_sigreturn_wrapper) ENTRY(compat_sys_rt_sigreturn_wrapper) mov x0, sp - mov x27, #0 // prevent syscall restart handling (why) b compat_sys_rt_sigreturn ENDPROC(compat_sys_rt_sigreturn_wrapper) -- cgit From dbf3c370862d73fcd2c74ca55e254bb02143238d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 10 Jul 2015 10:11:07 -0700 Subject: Revert "Input: synaptics - allocate 3 slots to keep stability in image sensors" This reverts commit 63c4fda3c0bb841b1aad1298fc7fe94058fc79f8 as it causes issues with detecting 3-finger taps. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=100481 Cc: stable@vger.kernel.org Acked-by: Benjamin Tissoires --- drivers/input/mouse/synaptics.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 35c8d0ceabee..3a32caf06bf1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -1199,7 +1199,7 @@ static void set_input_params(struct psmouse *psmouse, ABS_MT_POSITION_Y); /* Image sensors can report per-contact pressure */ input_set_abs_params(dev, ABS_MT_PRESSURE, 0, 255, 0, 0); - input_mt_init_slots(dev, 3, INPUT_MT_POINTER | INPUT_MT_TRACK); + input_mt_init_slots(dev, 2, INPUT_MT_POINTER | INPUT_MT_TRACK); /* Image sensors can signal 4 and 5 finger clicks */ __set_bit(BTN_TOOL_QUADTAP, dev->keybit); -- cgit From b864bc17f1c326783f2388057e15d3e153125ab9 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:10 -0600 Subject: pmem: add maintainer for include/linux/pmem.h The file include/linux/pmem.h was recently created to hold the PMEM API, and is logically part of the PMEM driver. Add an entry for this file to MAINTAINERS. Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0e6b09150aad..a8306b25c13c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6134,6 +6134,7 @@ L: linux-nvdimm@lists.01.org Q: https://patchwork.kernel.org/project/linux-nvdimm/list/ S: Supported F: drivers/nvdimm/pmem.c +F: include/linux/pmem.h LINUX FOR IBM pSERIES (RS/6000) M: Paul Mackerras -- cgit From b1b2e6235a44174151fa3bb22657f74972635618 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 11:06:11 -0600 Subject: tools/testing/nvdimm: mock ioremap_wt In the 4.2-rc1 merge the default_memremap_pmem() implementation switched from ioremap_nocache() to ioremap_wt(). Add it to the list of mocked routines to restore the ability to run the unit tests. Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e9b64520ec1..8e020601c773 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,3 +1,4 @@ +ldflags-y += --wrap=ioremap_wt ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index c85a6f6ba559..9f21b150396b 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -77,6 +77,12 @@ void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_nocache); +void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wt); +} +EXPORT_SYMBOL(__wrap_ioremap_wt); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; -- cgit From f7ec83684af020c961d7fab801f8e3ef7ce5de33 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 11:06:12 -0600 Subject: tools/testing/nvdimm: fix return code for unimplemented commands The implementation for the new "DIMM Flags" DSM relies on the -ENOTTY return code to indicate that the flags are unimplimented and to fall back to a safe default. As is the -ENXIO error code erroneoously indicates to fail enabling a BLK region. Signed-off-by: Dan Williams --- tools/testing/nvdimm/test/nfit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 4b69b8368de0..092d4724fe16 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -155,7 +155,7 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, int i, rc; if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) - return -ENXIO; + return -ENOTTY; /* lookup label space for the given dimm */ for (i = 0; i < ARRAY_SIZE(handle); i++) -- cgit From 9d27a87ec9e1317d368b1e5e3f4808078baa8c4c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Jul 2015 14:07:03 -0400 Subject: tools/testing/nvdimm: add mock acpi_nfit_flush_address entries to nfit_test In preparation for fixing the BLK path to properly use "directed pcommit" enable the unit test infrastructure to emit mock "flush" tables. Writes to these flush addresses trigger a memory controller to flush its internal buffers to persistent media, similar to the x86 "pcommit" instruction. Signed-off-by: Dan Williams --- tools/testing/nvdimm/Kbuild | 2 ++ tools/testing/nvdimm/test/iomap.c | 21 ++++++++++++++++ tools/testing/nvdimm/test/nfit.c | 50 +++++++++++++++++++++++++++++++++++++-- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8e020601c773..f56914c7929b 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,4 +1,6 @@ ldflags-y += --wrap=ioremap_wt +ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=ioremap_cache ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 9f21b150396b..64bfaa50831c 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -65,6 +65,21 @@ void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, return fallback_fn(offset, size); } +void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, + resource_size_t offset, unsigned long size) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return devm_ioremap_nocache(dev, offset, size); +} +EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); + void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) { return __nfit_test_ioremap(offset, size, ioremap_cache); @@ -83,6 +98,12 @@ void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) } EXPORT_SYMBOL(__wrap_ioremap_wt); +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wc); +} +EXPORT_SYMBOL(__wrap_ioremap_wc); + void __wrap_iounmap(volatile void __iomem *addr) { struct nfit_test_resource *nfit_res; diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index 092d4724fe16..d0bdae40ccc9 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -128,6 +128,8 @@ struct nfit_test { int num_pm; void **dimm; dma_addr_t *dimm_dma; + void **flush; + dma_addr_t *flush_dma; void **label; dma_addr_t *label_dma; void **spa_set; @@ -331,7 +333,8 @@ static int nfit_test0_alloc(struct nfit_test *t) + sizeof(struct acpi_nfit_system_address) * NUM_SPA + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + sizeof(struct acpi_nfit_control_region) * NUM_DCR - + sizeof(struct acpi_nfit_data_region) * NUM_BDW; + + sizeof(struct acpi_nfit_data_region) * NUM_BDW + + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; int i; t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); @@ -356,6 +359,10 @@ static int nfit_test0_alloc(struct nfit_test *t) if (!t->label[i]) return -ENOMEM; sprintf(t->label[i], "label%d", i); + + t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + if (!t->flush[i]) + return -ENOMEM; } for (i = 0; i < NUM_DCR; i++) { @@ -408,6 +415,7 @@ static void nfit_test0_setup(struct nfit_test *t) struct acpi_nfit_system_address *spa; struct acpi_nfit_control_region *dcr; struct acpi_nfit_data_region *bdw; + struct acpi_nfit_flush_address *flush; unsigned int offset; nfit_test_init_header(nfit_buf, size); @@ -831,6 +839,39 @@ static void nfit_test0_setup(struct nfit_test *t) bdw->capacity = DIMM_SIZE; bdw->start_address = 0; + offset = offset + sizeof(struct acpi_nfit_data_region) * 4; + /* flush0 (dimm0) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[0]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[0]; + + /* flush1 (dimm1) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[1]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[1]; + + /* flush2 (dimm2) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[2]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[2]; + + /* flush3 (dimm3) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[3]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[3]; + acpi_desc = &t->acpi_desc; set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); @@ -933,6 +974,10 @@ static int nfit_test_probe(struct platform_device *pdev) GFP_KERNEL); nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), GFP_KERNEL); nfit_test->label_dma = devm_kcalloc(dev, num, @@ -943,7 +988,8 @@ static int nfit_test_probe(struct platform_device *pdev) sizeof(dma_addr_t), GFP_KERNEL); if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label && nfit_test->label_dma && nfit_test->dcr - && nfit_test->dcr_dma) + && nfit_test->dcr_dma && nfit_test->flush + && nfit_test->flush_dma) /* pass */; else return -ENOMEM; -- cgit From c2ad29540cb913bd9e526fae77c35c7fb45f24a3 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:13 -0600 Subject: nfit: update block I/O path to use PMEM API Update the nfit block I/O path to use the new PMEM API and to adhere to the read/write flows outlined in the "NVDIMM Block Window Driver Writer's Guide": http://pmem.io/documents/NVDIMM_Driver_Writers_Guide.pdf This includes adding support for targeted NVDIMM flushes called "flush hints" in the ACPI 6.0 specification: http://www.uefi.org/sites/default/files/resources/ACPI_6.0.pdf For performance and media durability the mapping for a BLK aperture is moved to a write-combining mapping which is consistent with memcpy_to_pmem() and wmb_blk(). Signed-off-by: Ross Zwisler Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++------ drivers/acpi/nfit.h | 15 ++++++++- 2 files changed, 100 insertions(+), 12 deletions(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index a20b7c883ca0..11aa513f2fda 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "nfit.h" @@ -305,6 +306,23 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc, return true; } +static bool add_flush(struct acpi_nfit_desc *acpi_desc, + struct acpi_nfit_flush_address *flush) +{ + struct device *dev = acpi_desc->dev; + struct nfit_flush *nfit_flush = devm_kzalloc(dev, sizeof(*nfit_flush), + GFP_KERNEL); + + if (!nfit_flush) + return false; + INIT_LIST_HEAD(&nfit_flush->list); + nfit_flush->flush = flush; + list_add_tail(&nfit_flush->list, &acpi_desc->flushes); + dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__, + flush->device_handle, flush->hint_count); + return true; +} + static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, const void *end) { @@ -338,7 +356,8 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc, void *table, return err; break; case ACPI_NFIT_TYPE_FLUSH_ADDRESS: - dev_dbg(dev, "%s: flush\n", __func__); + if (!add_flush(acpi_desc, table)) + return err; break; case ACPI_NFIT_TYPE_SMBIOS: dev_dbg(dev, "%s: smbios\n", __func__); @@ -389,6 +408,7 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, { u16 dcr = __to_nfit_memdev(nfit_mem)->region_index; struct nfit_memdev *nfit_memdev; + struct nfit_flush *nfit_flush; struct nfit_dcr *nfit_dcr; struct nfit_bdw *nfit_bdw; struct nfit_idt *nfit_idt; @@ -442,6 +462,14 @@ static int nfit_mem_add(struct acpi_nfit_desc *acpi_desc, nfit_mem->idt_bdw = nfit_idt->idt; break; } + + list_for_each_entry(nfit_flush, &acpi_desc->flushes, list) { + if (nfit_flush->flush->device_handle != + nfit_memdev->memdev->device_handle) + continue; + nfit_mem->nfit_flush = nfit_flush; + break; + } break; } @@ -978,6 +1006,24 @@ static u64 to_interleave_offset(u64 offset, struct nfit_blk_mmio *mmio) return mmio->base_offset + line_offset + table_offset + sub_line_offset; } +static void wmb_blk(struct nfit_blk *nfit_blk) +{ + + if (nfit_blk->nvdimm_flush) { + /* + * The first wmb() is needed to 'sfence' all previous writes + * such that they are architecturally visible for the platform + * buffer flush. Note that we've already arranged for pmem + * writes to avoid the cache via arch_memcpy_to_pmem(). The + * final wmb() ensures ordering for the NVDIMM flush write. + */ + wmb(); + writeq(1, nfit_blk->nvdimm_flush); + wmb(); + } else + wmb_pmem(); +} + static u64 read_blk_stat(struct nfit_blk *nfit_blk, unsigned int bw) { struct nfit_blk_mmio *mmio = &nfit_blk->mmio[DCR]; @@ -1012,6 +1058,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, offset = to_interleave_offset(offset, mmio); writeq(cmd, mmio->base + offset); + wmb_blk(nfit_blk); /* FIXME: conditionally perform read-back if mandated by firmware */ } @@ -1026,7 +1073,6 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, base_offset = nfit_blk->bdw_offset + dpa % L1_CACHE_BYTES + lane * mmio->size; - /* TODO: non-temporal access, flush hints, cache management etc... */ write_blk_ctl(nfit_blk, lane, dpa, len, rw); while (len) { unsigned int c; @@ -1045,13 +1091,19 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, } if (rw) - memcpy(mmio->aperture + offset, iobuf + copied, c); + memcpy_to_pmem(mmio->aperture + offset, + iobuf + copied, c); else - memcpy(iobuf + copied, mmio->aperture + offset, c); + memcpy_from_pmem(iobuf + copied, + mmio->aperture + offset, c); copied += c; len -= c; } + + if (rw) + wmb_blk(nfit_blk); + rc = read_blk_stat(nfit_blk, lane) ? -EIO : 0; return rc; } @@ -1124,7 +1176,7 @@ static void nfit_spa_unmap(struct acpi_nfit_desc *acpi_desc, } static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { resource_size_t start = spa->address; resource_size_t n = spa->length; @@ -1152,8 +1204,15 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, if (!res) goto err_mem; - /* TODO: cacheability based on the spa type */ - spa_map->iomem = ioremap_nocache(start, n); + if (type == SPA_MAP_APERTURE) { + /* + * TODO: memremap_pmem() support, but that requires cache + * flushing when the aperture is moved. + */ + spa_map->iomem = ioremap_wc(start, n); + } else + spa_map->iomem = ioremap_nocache(start, n); + if (!spa_map->iomem) goto err_map; @@ -1171,6 +1230,7 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * nfit_spa_map - interleave-aware managed-mappings of acpi_nfit_system_address ranges * @nvdimm_bus: NFIT-bus that provided the spa table entry * @nfit_spa: spa table to map + * @type: aperture or control region * * In the case where block-data-window apertures and * dimm-control-regions are interleaved they will end up sharing a @@ -1180,12 +1240,12 @@ static void __iomem *__nfit_spa_map(struct acpi_nfit_desc *acpi_desc, * unbound. */ static void __iomem *nfit_spa_map(struct acpi_nfit_desc *acpi_desc, - struct acpi_nfit_system_address *spa) + struct acpi_nfit_system_address *spa, enum spa_map_type type) { void __iomem *iomem; mutex_lock(&acpi_desc->spa_map_mutex); - iomem = __nfit_spa_map(acpi_desc, spa); + iomem = __nfit_spa_map(acpi_desc, spa, type); mutex_unlock(&acpi_desc->spa_map_mutex); return iomem; @@ -1212,6 +1272,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct nvdimm_bus_descriptor *nd_desc = to_nd_desc(nvdimm_bus); struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); struct nd_blk_region *ndbr = to_nd_blk_region(dev); + struct nfit_flush *nfit_flush; struct nfit_blk_mmio *mmio; struct nfit_blk *nfit_blk; struct nfit_mem *nfit_mem; @@ -1237,7 +1298,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, /* map block aperture memory */ nfit_blk->bdw_offset = nfit_mem->bdw->offset; mmio = &nfit_blk->mmio[BDW]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_bdw, + SPA_MAP_APERTURE); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map bdw\n", __func__, nvdimm_name(nvdimm)); @@ -1259,7 +1321,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, nfit_blk->cmd_offset = nfit_mem->dcr->command_offset; nfit_blk->stat_offset = nfit_mem->dcr->status_offset; mmio = &nfit_blk->mmio[DCR]; - mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr); + mmio->base = nfit_spa_map(acpi_desc, nfit_mem->spa_dcr, + SPA_MAP_CONTROL); if (!mmio->base) { dev_dbg(dev, "%s: %s failed to map dcr\n", __func__, nvdimm_name(nvdimm)); @@ -1277,6 +1340,17 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + nfit_flush = nfit_mem->nfit_flush; + if (nfit_flush && nfit_flush->flush->hint_count != 0) { + nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, + nfit_flush->flush->hint_address[0], 8); + if (!nfit_blk->nvdimm_flush) + return -ENOMEM; + } + + if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + dev_warn(dev, "unable to guarantee persistence of writes\n"); + if (mmio->line_size == 0) return 0; @@ -1459,6 +1533,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) INIT_LIST_HEAD(&acpi_desc->dcrs); INIT_LIST_HEAD(&acpi_desc->bdws); INIT_LIST_HEAD(&acpi_desc->idts); + INIT_LIST_HEAD(&acpi_desc->flushes); INIT_LIST_HEAD(&acpi_desc->memdevs); INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 81f2e8c5a79c..815cb561cdba 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -60,6 +60,11 @@ struct nfit_idt { struct list_head list; }; +struct nfit_flush { + struct acpi_nfit_flush_address *flush; + struct list_head list; +}; + struct nfit_memdev { struct acpi_nfit_memory_map *memdev; struct list_head list; @@ -77,6 +82,7 @@ struct nfit_mem { struct acpi_nfit_system_address *spa_bdw; struct acpi_nfit_interleave *idt_dcr; struct acpi_nfit_interleave *idt_bdw; + struct nfit_flush *nfit_flush; struct list_head list; struct acpi_device *adev; unsigned long dsm_mask; @@ -88,6 +94,7 @@ struct acpi_nfit_desc { struct mutex spa_map_mutex; struct list_head spa_maps; struct list_head memdevs; + struct list_head flushes; struct list_head dimms; struct list_head spas; struct list_head dcrs; @@ -109,7 +116,7 @@ struct nfit_blk { struct nfit_blk_mmio { union { void __iomem *base; - void *aperture; + void __pmem *aperture; }; u64 size; u64 base_offset; @@ -123,6 +130,12 @@ struct nfit_blk { u64 bdw_offset; /* post interleave offset */ u64 stat_offset; u64 cmd_offset; + void __iomem *nvdimm_flush; +}; + +enum spa_map_type { + SPA_MAP_CONTROL, + SPA_MAP_APERTURE, }; struct nfit_spa_mapping { -- cgit From f0f2c072cf530d5b8890be5051cc8b36b0c54cce Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 10 Jul 2015 11:06:14 -0600 Subject: nfit: add support for NVDIMM "latch" flag Add support in the NFIT BLK I/O path for the "latch" flag defined in the "Get Block NVDIMM Flags" _DSM function: http://pmem.io/documents/NVDIMM_DSM_Interface_Example.pdf This flag requires the driver to read back the command register after it is written in the block I/O path. This ensures that the hardware has fully processed the new command and moved the aperture appropriately. Signed-off-by: Ross Zwisler Acked-by: Rafael J. Wysocki Signed-off-by: Dan Williams --- drivers/acpi/nfit.c | 33 ++++++++++++++++++++++++++++++++- drivers/acpi/nfit.h | 5 +++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 11aa513f2fda..628a42c41ab1 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -1059,7 +1059,9 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, writeq(cmd, mmio->base + offset); wmb_blk(nfit_blk); - /* FIXME: conditionally perform read-back if mandated by firmware */ + + if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + readq(mmio->base + offset); } static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, @@ -1266,6 +1268,28 @@ static int nfit_blk_init_interleave(struct nfit_blk_mmio *mmio, return 0; } +static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, struct nfit_blk *nfit_blk) +{ + struct nd_cmd_dimm_flags flags; + int rc; + + memset(&flags, 0, sizeof(flags)); + rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, + sizeof(flags)); + + if (rc >= 0 && flags.status == 0) + nfit_blk->dimm_flags = flags.flags; + else if (rc == -ENOTTY) { + /* fall back to a conservative default */ + nfit_blk->dimm_flags = ND_BLK_DCR_LATCH; + rc = 0; + } else + rc = -ENXIO; + + return rc; +} + static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, struct device *dev) { @@ -1340,6 +1364,13 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return rc; } + rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk); + if (rc < 0) { + dev_dbg(dev, "%s: %s failed get DIMM flags\n", + __func__, nvdimm_name(nvdimm)); + return rc; + } + nfit_flush = nfit_mem->nfit_flush; if (nfit_flush && nfit_flush->flush->hint_count != 0) { nfit_blk->nvdimm_flush = devm_ioremap_nocache(dev, diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 815cb561cdba..79b6d83875c1 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -40,6 +40,10 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum { + ND_BLK_DCR_LATCH = 2, +}; + struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; @@ -131,6 +135,7 @@ struct nfit_blk { u64 stat_offset; u64 cmd_offset; void __iomem *nvdimm_flush; + u32 dimm_flags; }; enum spa_map_type { -- cgit From cb908ed3495496b9973a2b9ed1a60f43933fdf01 Mon Sep 17 00:00:00 2001 From: Alex Ivanov Date: Mon, 15 Jun 2015 08:50:45 +0300 Subject: stifb: Implement hardware accelerated copyarea This patch adds hardware assisted scrolling. The code is based upon the following investigation: https://parisc.wiki.kernel.org/index.php/NGLE#Blitter A simple 'time ls -la /usr/bin' test shows 1.6x speed increase over soft copy and 2.3x increase over FBINFO_READS_FAST (prefer soft copy over screen redraw) on Artist framebuffer. Signed-off-by: Alex Ivanov Signed-off-by: Helge Deller --- drivers/video/fbdev/stifb.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c index 86621fabbb8b..735355b0e023 100644 --- a/drivers/video/fbdev/stifb.c +++ b/drivers/video/fbdev/stifb.c @@ -121,6 +121,7 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_3 0x0004a0 #define REG_4 0x000600 #define REG_6 0x000800 +#define REG_7 0x000804 #define REG_8 0x000820 #define REG_9 0x000a04 #define REG_10 0x018000 @@ -135,6 +136,8 @@ static int __initdata stifb_bpp_pref[MAX_STI_ROMS]; #define REG_21 0x200218 #define REG_22 0x0005a0 #define REG_23 0x0005c0 +#define REG_24 0x000808 +#define REG_25 0x000b00 #define REG_26 0x200118 #define REG_27 0x200308 #define REG_32 0x21003c @@ -429,6 +432,9 @@ ARTIST_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) #define SET_LENXY_START_RECFILL(fb, lenxy) \ WRITE_WORD(lenxy, fb, REG_9) +#define SETUP_COPYAREA(fb) \ + WRITE_BYTE(0, fb, REG_16b1) + static void HYPER_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable) { @@ -1004,6 +1010,36 @@ stifb_blank(int blank_mode, struct fb_info *info) return 0; } +static void +stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area) +{ + struct stifb_info *fb = container_of(info, struct stifb_info, info); + + SETUP_COPYAREA(fb); + + SETUP_HW(fb); + if (fb->info.var.bits_per_pixel == 32) { + WRITE_WORD(0xBBA0A000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff); + } else { + WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10); + + NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff); + } + + NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb, + IBOvals(RopSrc, MaskAddrOffset(0), + BitmapExtent08, StaticReg(1), + DataDynamic, MaskOtc, BGx(0), FGx(0))); + + WRITE_WORD(((area->sx << 16) | area->sy), fb, REG_24); + WRITE_WORD(((area->width << 16) | area->height), fb, REG_7); + WRITE_WORD(((area->dx << 16) | area->dy), fb, REG_25); + + SETUP_FB(fb); +} + static void __init stifb_init_display(struct stifb_info *fb) { @@ -1069,7 +1105,7 @@ static struct fb_ops stifb_ops = { .fb_setcolreg = stifb_setcolreg, .fb_blank = stifb_blank, .fb_fillrect = cfb_fillrect, - .fb_copyarea = cfb_copyarea, + .fb_copyarea = stifb_copyarea, .fb_imageblit = cfb_imageblit, }; @@ -1258,7 +1294,7 @@ static int __init stifb_init_fb(struct sti_struct *sti, int bpp_pref) info->fbops = &stifb_ops; info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len); info->screen_size = fix->smem_len; - info->flags = FBINFO_DEFAULT; + info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA; info->pseudo_palette = &fb->pseudo_palette; /* This has to be done !!! */ -- cgit From 01ab60570427caa24b9debc369e452e86cd9beb4 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Wed, 1 Jul 2015 17:18:37 -0400 Subject: parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results The increased use of pdtlb/pitlb instructions seemed to increase the frequency of random segmentation faults building packages. Further, we had a number of cases where TLB inserts would repeatedly fail and all forward progress would stop. The Haskell ghc package caused a lot of trouble in this area. The final indication of a race in pte handling was this syslog entry on sibaris (C8000): swap_free: Unused swap offset entry 00000004 BUG: Bad page map in process mysqld pte:00000100 pmd:019bbec5 addr:00000000ec464000 vm_flags:00100073 anon_vma:0000000221023828 mapping: (null) index:ec464 CPU: 1 PID: 9176 Comm: mysqld Not tainted 4.0.0-2-parisc64-smp #1 Debian 4.0.5-1 Backtrace: [<0000000040173eb0>] show_stack+0x20/0x38 [<0000000040444424>] dump_stack+0x9c/0x110 [<00000000402a0d38>] print_bad_pte+0x1a8/0x278 [<00000000402a28b8>] unmap_single_vma+0x3d8/0x770 [<00000000402a4090>] zap_page_range+0xf0/0x198 [<00000000402ba2a4>] SyS_madvise+0x404/0x8c0 Note that the pte value is 0 except for the accessed bit 0x100. This bit shouldn't be set without the present bit. It should be noted that the madvise system call is probably a trigger for many of the random segmentation faults. In looking at the kernel code, I found the following problems: 1) The pte_clear define didn't take TLB lock when clearing a pte. 2) We didn't test pte present bit inside lock in exception support. 3) The pte and tlb locks needed to merged in order to ensure consistency between page table and TLB. This also has the effect of serializing TLB broadcasts on SMP systems. The attached change implements the above and a few other tweaks to try to improve performance. Based on the timing code, TLB purges are very slow (e.g., ~ 209 cycles per page on rp3440). Thus, I think it beneficial to test the split_tlb variable to avoid duplicate purges. Probably, all PA 2.0 machines have combined TLBs. I dropped using __flush_tlb_range in flush_tlb_mm as I realized all applications and most threads have a stack size that is too large to make this useful. I added some comments to this effect. Since implementing 1 through 3, I haven't had any random segmentation faults on mx3210 (rp3440) in about one week of building code and running as a Debian buildd. Signed-off-by: John David Anglin Cc: stable@vger.kernel.org # v3.18+ Signed-off-by: Helge Deller --- arch/parisc/include/asm/pgtable.h | 55 +++++++++---- arch/parisc/include/asm/tlbflush.h | 53 ++++++------ arch/parisc/kernel/cache.c | 105 +++++++++++++++--------- arch/parisc/kernel/entry.S | 163 ++++++++++++++++++------------------- arch/parisc/kernel/traps.c | 4 - 5 files changed, 212 insertions(+), 168 deletions(-) diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 0a183756d6ec..f93c4a4e6580 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -16,7 +16,7 @@ #include #include -extern spinlock_t pa_dbit_lock; +extern spinlock_t pa_tlb_lock; /* * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel @@ -33,6 +33,19 @@ extern spinlock_t pa_dbit_lock; */ #define kern_addr_valid(addr) (1) +/* Purge data and instruction TLB entries. Must be called holding + * the pa_tlb_lock. The TLB purge instructions are slow on SMP + * machines since the purge must be broadcast to all CPUs. + */ + +static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) +{ + mtsp(mm->context, 1); + pdtlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); +} + /* Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. @@ -42,15 +55,20 @@ extern spinlock_t pa_dbit_lock; *(pteptr) = (pteval); \ } while(0) -extern void purge_tlb_entries(struct mm_struct *, unsigned long); +#define pte_inserted(x) \ + ((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED)) \ + == (_PAGE_PRESENT|_PAGE_ACCESSED)) -#define set_pte_at(mm, addr, ptep, pteval) \ - do { \ +#define set_pte_at(mm, addr, ptep, pteval) \ + do { \ + pte_t old_pte; \ unsigned long flags; \ - spin_lock_irqsave(&pa_dbit_lock, flags); \ - set_pte(ptep, pteval); \ - purge_tlb_entries(mm, addr); \ - spin_unlock_irqrestore(&pa_dbit_lock, flags); \ + spin_lock_irqsave(&pa_tlb_lock, flags); \ + old_pte = *ptep; \ + set_pte(ptep, pteval); \ + if (pte_inserted(old_pte)) \ + purge_tlb_entries(mm, addr); \ + spin_unlock_irqrestore(&pa_tlb_lock, flags); \ } while (0) #endif /* !__ASSEMBLY__ */ @@ -268,7 +286,7 @@ extern unsigned long *empty_zero_page; #define pte_none(x) (pte_val(x) == 0) #define pte_present(x) (pte_val(x) & _PAGE_PRESENT) -#define pte_clear(mm,addr,xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_clear(mm, addr, xp) set_pte_at(mm, addr, xp, __pte(0)) #define pmd_flag(x) (pmd_val(x) & PxD_FLAG_MASK) #define pmd_address(x) ((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT) @@ -435,15 +453,15 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned if (!pte_young(*ptep)) return 0; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); pte = *ptep; if (!pte_young(pte)) { - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 0; } set_pte(ptep, pte_mkold(pte)); purge_tlb_entries(vma->vm_mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return 1; } @@ -453,11 +471,12 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t old_pte; unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); old_pte = *ptep; - pte_clear(mm,addr,ptep); - purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + set_pte(ptep, __pte(0)); + if (pte_inserted(old_pte)) + purge_tlb_entries(mm, addr); + spin_unlock_irqrestore(&pa_tlb_lock, flags); return old_pte; } @@ -465,10 +484,10 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { unsigned long flags; - spin_lock_irqsave(&pa_dbit_lock, flags); + spin_lock_irqsave(&pa_tlb_lock, flags); set_pte(ptep, pte_wrprotect(*ptep)); purge_tlb_entries(mm, addr); - spin_unlock_irqrestore(&pa_dbit_lock, flags); + spin_unlock_irqrestore(&pa_tlb_lock, flags); } #define pte_same(A,B) (pte_val(A) == pte_val(B)) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index 9d086a599fa0..e84b96478193 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -13,6 +13,9 @@ * active at any one time on the Merced bus. This tlb purge * synchronisation is fairly lightweight and harmless so we activate * it on all systems not just the N class. + + * It is also used to ensure PTE updates are atomic and consistent + * with the TLB. */ extern spinlock_t pa_tlb_lock; @@ -24,20 +27,24 @@ extern void flush_tlb_all_local(void *); #define smp_flush_tlb_all() flush_tlb_all() +int __flush_tlb_range(unsigned long sid, + unsigned long start, unsigned long end); + +#define flush_tlb_range(vma, start, end) \ + __flush_tlb_range((vma)->vm_mm->context, start, end) + +#define flush_tlb_kernel_range(start, end) \ + __flush_tlb_range(0, start, end) + /* * flush_tlb_mm() * - * XXX This code is NOT valid for HP-UX compatibility processes, - * (although it will probably work 99% of the time). HP-UX - * processes are free to play with the space id's and save them - * over long periods of time, etc. so we have to preserve the - * space and just flush the entire tlb. We need to check the - * personality in order to do that, but the personality is not - * currently being set correctly. - * - * Of course, Linux processes could do the same thing, but - * we don't support that (and the compilers, dynamic linker, - * etc. do not do that). + * The code to switch to a new context is NOT valid for processes + * which play with the space id's. Thus, we have to preserve the + * space and just flush the entire tlb. However, the compilers, + * dynamic linker, etc, do not manipulate space id's, so there + * could be a significant performance benefit in switching contexts + * and not flushing the whole tlb. */ static inline void flush_tlb_mm(struct mm_struct *mm) @@ -45,10 +52,18 @@ static inline void flush_tlb_mm(struct mm_struct *mm) BUG_ON(mm == &init_mm); /* Should never happen */ #if 1 || defined(CONFIG_SMP) + /* Except for very small threads, flushing the whole TLB is + * faster than using __flush_tlb_range. The pdtlb and pitlb + * instructions are very slow because of the TLB broadcast. + * It might be faster to do local range flushes on all CPUs + * on PA 2.0 systems. + */ flush_tlb_all(); #else /* FIXME: currently broken, causing space id and protection ids - * to go out of sync, resulting in faults on userspace accesses. + * to go out of sync, resulting in faults on userspace accesses. + * This approach needs further investigation since running many + * small applications (e.g., GCC testsuite) is faster on HP-UX. */ if (mm) { if (mm->context != 0) @@ -65,22 +80,12 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, { unsigned long flags, sid; - /* For one page, it's not worth testing the split_tlb variable */ - - mb(); sid = vma->vm_mm->context; purge_tlb_start(flags); mtsp(sid, 1); pdtlb(addr); - pitlb(addr); + if (unlikely(split_tlb)) + pitlb(addr); purge_tlb_end(flags); } - -void __flush_tlb_range(unsigned long sid, - unsigned long start, unsigned long end); - -#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end) - -#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end) - #endif diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index f6448c7c62b5..cda6dbbe9842 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -342,12 +342,15 @@ EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ -int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; +static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD; + +#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */ +static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD; void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; - unsigned long size; + unsigned long size, start; alltime = mfctl(16); flush_data_cache(); @@ -364,14 +367,43 @@ void __init parisc_setup_cache_timing(void) /* Racy, but if we see an intermediate value, it's ok too... */ parisc_cache_flush_threshold = size * alltime / rangetime; - parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); + parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold); if (!parisc_cache_flush_threshold) parisc_cache_flush_threshold = FLUSH_THRESHOLD; if (parisc_cache_flush_threshold > cache_info.dc_size) parisc_cache_flush_threshold = cache_info.dc_size; - printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus()); + printk(KERN_INFO "Setting cache flush threshold to %lu kB\n", + parisc_cache_flush_threshold/1024); + + /* calculate TLB flush threshold */ + + alltime = mfctl(16); + flush_tlb_all(); + alltime = mfctl(16) - alltime; + + size = PAGE_SIZE; + start = (unsigned long) _text; + rangetime = mfctl(16); + while (start < (unsigned long) _end) { + flush_tlb_kernel_range(start, start + PAGE_SIZE); + start += PAGE_SIZE; + size += PAGE_SIZE; + } + rangetime = mfctl(16) - rangetime; + + printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n", + alltime, size, rangetime); + + parisc_tlb_flush_threshold = size * alltime / rangetime; + parisc_tlb_flush_threshold *= num_online_cpus(); + parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold); + if (!parisc_tlb_flush_threshold) + parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD; + + printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n", + parisc_tlb_flush_threshold/1024); } extern void purge_kernel_dcache_page_asm(unsigned long); @@ -403,48 +435,45 @@ void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, } EXPORT_SYMBOL(copy_user_page); -void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) -{ - unsigned long flags; - - /* Note: purge_tlb_entries can be called at startup with - no context. */ - - purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); - purge_tlb_end(flags); -} -EXPORT_SYMBOL(purge_tlb_entries); - -void __flush_tlb_range(unsigned long sid, unsigned long start, - unsigned long end) +/* __flush_tlb_range() + * + * returns 1 if all TLBs were flushed. + */ +int __flush_tlb_range(unsigned long sid, unsigned long start, + unsigned long end) { - unsigned long npages; + unsigned long flags, size; - npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT; - if (npages >= 512) /* 2MB of space: arbitrary, should be tuned */ + size = (end - start); + if (size >= parisc_tlb_flush_threshold) { flush_tlb_all(); - else { - unsigned long flags; + return 1; + } + /* Purge TLB entries for small ranges using the pdtlb and + pitlb instructions. These instructions execute locally + but cause a purge request to be broadcast to other TLBs. */ + if (likely(!split_tlb)) { + while (start < end) { + purge_tlb_start(flags); + mtsp(sid, 1); + pdtlb(start); + purge_tlb_end(flags); + start += PAGE_SIZE; + } + return 0; + } + + /* split TLB case */ + while (start < end) { purge_tlb_start(flags); mtsp(sid, 1); - if (split_tlb) { - while (npages--) { - pdtlb(start); - pitlb(start); - start += PAGE_SIZE; - } - } else { - while (npages--) { - pdtlb(start); - start += PAGE_SIZE; - } - } + pdtlb(start); + pitlb(start); purge_tlb_end(flags); + start += PAGE_SIZE; } + return 0; } static void cacheflush_h_tmp_function(void *dummy) diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 75819617f93b..c5ef4081b01d 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -45,7 +45,7 @@ .level 2.0 #endif - .import pa_dbit_lock,data + .import pa_tlb_lock,data /* space_to_prot macro creates a prot id from a space id */ @@ -420,8 +420,8 @@ SHLREG %r9,PxD_VALUE_SHIFT,\pmd extru \va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index dep %r0,31,PAGE_SHIFT,\pmd /* clear offset */ - shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd - LDREG %r0(\pmd),\pte /* pmd is now pte */ + shladd \index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */ + LDREG %r0(\pmd),\pte bb,>=,n \pte,_PAGE_PRESENT_BIT,\fault .endm @@ -453,57 +453,53 @@ L2_ptep \pgd,\pte,\index,\va,\fault .endm - /* Acquire pa_dbit_lock lock. */ - .macro dbit_lock spc,tmp,tmp1 + /* Acquire pa_tlb_lock lock and recheck page is still present. */ + .macro tlb_lock spc,ptp,pte,tmp,tmp1,fault #ifdef CONFIG_SMP cmpib,COND(=),n 0,\spc,2f - load32 PA(pa_dbit_lock),\tmp + load32 PA(pa_tlb_lock),\tmp 1: LDCW 0(\tmp),\tmp1 cmpib,COND(=) 0,\tmp1,1b nop + LDREG 0(\ptp),\pte + bb,<,n \pte,_PAGE_PRESENT_BIT,2f + b \fault + stw \spc,0(\tmp) 2: #endif .endm - /* Release pa_dbit_lock lock without reloading lock address. */ - .macro dbit_unlock0 spc,tmp + /* Release pa_tlb_lock lock without reloading lock address. */ + .macro tlb_unlock0 spc,tmp #ifdef CONFIG_SMP or,COND(=) %r0,\spc,%r0 stw \spc,0(\tmp) #endif .endm - /* Release pa_dbit_lock lock. */ - .macro dbit_unlock1 spc,tmp + /* Release pa_tlb_lock lock. */ + .macro tlb_unlock1 spc,tmp #ifdef CONFIG_SMP - load32 PA(pa_dbit_lock),\tmp - dbit_unlock0 \spc,\tmp + load32 PA(pa_tlb_lock),\tmp + tlb_unlock0 \spc,\tmp #endif .endm /* Set the _PAGE_ACCESSED bit of the PTE. Be clever and * don't needlessly dirty the cache line if it was already set */ - .macro update_ptep spc,ptep,pte,tmp,tmp1 -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_accessed ptp,pte,tmp,tmp1 ldi _PAGE_ACCESSED,\tmp1 or \tmp1,\pte,\tmp and,COND(<>) \tmp1,\pte,%r0 - STREG \tmp,0(\ptep) + STREG \tmp,0(\ptp) .endm /* Set the dirty bit (and accessed bit). No need to be * clever, this is only used from the dirty fault */ - .macro update_dirty spc,ptep,pte,tmp -#ifdef CONFIG_SMP - or,COND(=) %r0,\spc,%r0 - LDREG 0(\ptep),\pte -#endif + .macro update_dirty ptp,pte,tmp ldi _PAGE_ACCESSED|_PAGE_DIRTY,\tmp or \tmp,\pte,\pte - STREG \pte,0(\ptep) + STREG \pte,0(\ptp) .endm /* bitshift difference between a PFN (based on kernel's PAGE_SIZE) @@ -1148,14 +1144,14 @@ dtlb_miss_20w: L3_ptep ptp,pte,t0,va,dtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1174,14 +1170,14 @@ nadtlb_miss_20w: L3_ptep ptp,pte,t0,va,nadtlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1202,20 +1198,20 @@ dtlb_miss_11: L2_ptep ptp,pte,t0,va,dtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1235,21 +1231,20 @@ nadtlb_miss_11: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 idtlba pte,(%sr1,va) idtlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1269,16 +1264,16 @@ dtlb_miss_20: L2_ptep ptp,pte,t0,va,dtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,dtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 idtlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1297,16 +1292,16 @@ nadtlb_miss_20: L2_ptep ptp,pte,t0,va,nadtlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,nadtlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 - idtlbt pte,prot - dbit_unlock1 spc,t0 + idtlbt pte,prot + tlb_unlock1 spc,t0 rfir nop @@ -1406,14 +1401,14 @@ itlb_miss_20w: L3_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1430,14 +1425,14 @@ naitlb_miss_20w: L3_ptep ptp,pte,t0,va,naitlb_check_alias_20w - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20w + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1458,20 +1453,20 @@ itlb_miss_11: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1482,20 +1477,20 @@ naitlb_miss_11: L2_ptep ptp,pte,t0,va,naitlb_check_alias_11 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_11 + update_accessed ptp,pte,t0,t1 make_insert_tlb_11 spc,pte,prot - mfsp %sr1,t0 /* Save sr1 so we can use it in tlb inserts */ + mfsp %sr1,t1 /* Save sr1 so we can use it in tlb inserts */ mtsp spc,%sr1 iitlba pte,(%sr1,va) iitlbp prot,(%sr1,va) - mtsp t0, %sr1 /* Restore sr1 */ - dbit_unlock1 spc,t0 + mtsp t1, %sr1 /* Restore sr1 */ + tlb_unlock1 spc,t0 rfir nop @@ -1516,16 +1511,16 @@ itlb_miss_20: L2_ptep ptp,pte,t0,va,itlb_fault - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,itlb_fault + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1536,16 +1531,16 @@ naitlb_miss_20: L2_ptep ptp,pte,t0,va,naitlb_check_alias_20 - dbit_lock spc,t0,t1 - update_ptep spc,ptp,pte,t0,t1 + tlb_lock spc,ptp,pte,t0,t1,naitlb_check_alias_20 + update_accessed ptp,pte,t0,t1 make_insert_tlb spc,pte,prot - f_extend pte,t0 + f_extend pte,t1 iitlbt pte,prot - dbit_unlock1 spc,t0 + tlb_unlock1 spc,t0 rfir nop @@ -1568,14 +1563,14 @@ dbit_trap_20w: L3_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot idtlbt pte,prot - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop #else @@ -1588,8 +1583,8 @@ dbit_trap_11: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb_11 spc,pte,prot @@ -1600,8 +1595,8 @@ dbit_trap_11: idtlbp prot,(%sr1,va) mtsp t1, %sr1 /* Restore sr1 */ - dbit_unlock0 spc,t0 + tlb_unlock0 spc,t0 rfir nop @@ -1612,16 +1607,16 @@ dbit_trap_20: L2_ptep ptp,pte,t0,va,dbit_fault - dbit_lock spc,t0,t1 - update_dirty spc,ptp,pte,t1 + tlb_lock spc,ptp,pte,t0,t1,dbit_fault + update_dirty ptp,pte,t1 make_insert_tlb spc,pte,prot f_extend pte,t1 - idtlbt pte,prot - dbit_unlock0 spc,t0 + idtlbt pte,prot + tlb_unlock0 spc,t0 rfir nop #endif diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 6548fd1d2e62..b99b39f1da02 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -43,10 +43,6 @@ #include "../math-emu/math-emu.h" /* for handle_fpe() */ -#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK) -DEFINE_SPINLOCK(pa_dbit_lock); -#endif - static void parisc_show_stack(struct task_struct *task, unsigned long *sp, struct pt_regs *regs); -- cgit From 892e8cac99a71f6254f84fc662068d912e1943bf Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 10 Jul 2015 09:40:59 -0400 Subject: selinux: fix mprotect PROT_EXEC regression caused by mm change commit 66fc13039422ba7df2d01a8ee0873e4ef965b50b ("mm: shmem_zero_setup skip security check and lockdep conflict with XFS") caused a regression for SELinux by disabling any SELinux checking of mprotect PROT_EXEC on shared anonymous mappings. However, even before that regression, the checking on such mprotect PROT_EXEC calls was inconsistent with the checking on a mmap PROT_EXEC call for a shared anonymous mapping. On a mmap, the security hook is passed a NULL file and knows it is dealing with an anonymous mapping and therefore applies an execmem check and no file checks. On a mprotect, the security hook is passed a vma with a non-NULL vm_file (as this was set from the internally-created shmem file during mmap) and therefore applies the file-based execute check and no execmem check. Since the aforementioned commit now marks the shmem zero inode with the S_PRIVATE flag, the file checks are disabled and we have no checking at all on mprotect PROT_EXEC. Add a test to the mprotect hook logic for such private inodes, and apply an execmem check in that case. This makes the mmap and mprotect checking consistent for shared anonymous mappings, as well as for /dev/zero and ashmem. Cc: # 4.1.x Signed-off-by: Stephen Smalley Signed-off-by: Paul Moore --- security/selinux/hooks.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 312537d48050..692e3cc8ce23 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3271,7 +3271,8 @@ static int file_map_prot_check(struct file *file, unsigned long prot, int shared int rc = 0; if (default_noexec && - (prot & PROT_EXEC) && (!file || (!shared && (prot & PROT_WRITE)))) { + (prot & PROT_EXEC) && (!file || IS_PRIVATE(file_inode(file)) || + (!shared && (prot & PROT_WRITE)))) { /* * We are making executable an anonymous mapping or a * private file mapping that will also be writable. -- cgit From c4d029f2d43b39de7b9299e8b58102a442ba86f8 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 11 Jul 2015 14:26:34 +0200 Subject: tick/broadcast: Prevent NULL pointer dereference Dan reported that the recent changes to the broadcast code introduced a potential NULL dereference. Add the proper check. Fixes: e0454311903d "tick/broadcast: Sanity check the shutdown of the local clock_event" Reported-by: Dan Carpenter Signed-off-by: Thomas Gleixner --- kernel/time/tick-broadcast.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index ee3cf942d6eb..52b9e199b5ac 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -409,14 +409,16 @@ void tick_broadcast_control(enum tick_broadcast_mode mode) break; } - if (cpumask_empty(tick_broadcast_mask)) { - if (!bc_stopped) - clockevents_shutdown(bc); - } else if (bc_stopped) { - if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) - tick_broadcast_start_periodic(bc); - else - tick_broadcast_setup_oneshot(bc); + if (bc) { + if (cpumask_empty(tick_broadcast_mask)) { + if (!bc_stopped) + clockevents_shutdown(bc); + } else if (bc_stopped) { + if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) + tick_broadcast_start_periodic(bc); + else + tick_broadcast_setup_oneshot(bc); + } } raw_spin_unlock(&tick_broadcast_lock); } -- cgit From 0a73d0a204a4a04a1e110539c5a524ae51f91d6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:34:29 -0400 Subject: 9p: don't leave a half-initialized inode sitting around Cc: stable@vger.kernel.org # all branches Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 3 +-- fs/9p/vfs_inode_dotl.c | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 510040b04c96..b1dc51888048 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -540,8 +540,7 @@ static struct inode *v9fs_qid_iget(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 09e4433717b8..e8aa57dc8d6d 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -149,8 +149,7 @@ static struct inode *v9fs_qid_iget_dotl(struct super_block *sb, unlock_new_inode(inode); return inode; error: - unlock_new_inode(inode); - iput(inode); + iget_failed(inode); return ERR_PTR(retval); } -- cgit From 9391dd00d13c853ab4f2a85435288ae2202e0e43 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 12 Jul 2015 10:39:45 -0400 Subject: fix a braino in ovl_d_select_inode() when opening a directory we want the overlayfs inode, not one from the topmost layer. Reported-By: Andrey Jr. Melnikov Tested-By: Andrey Jr. Melnikov Signed-off-by: Al Viro --- fs/overlayfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index f140e3dbfb7b..d9da5a4e9382 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -343,6 +343,9 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) struct path realpath; enum ovl_path_type type; + if (d_is_dir(dentry)) + return d_backing_inode(dentry); + type = ovl_path_real(dentry, &realpath); if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) { err = ovl_want_write(dentry); -- cgit From 75a6f82a0d10ef8f13cd8fe7212911a0252ab99e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 8 Jul 2015 02:42:38 +0100 Subject: freeing unlinked file indefinitely delayed Normally opening a file, unlinking it and then closing will have the inode freed upon close() (provided that it's not otherwise busy and has no remaining links, of course). However, there's one case where that does *not* happen. Namely, if you open it by fhandle with cold dcache, then unlink() and close(). In normal case you get d_delete() in unlink(2) notice that dentry is busy and unhash it; on the final dput() it will be forcibly evicted from dcache, triggering iput() and inode removal. In this case, though, we end up with *two* dentries - disconnected (created by open-by-fhandle) and regular one (used by unlink()). The latter will have its reference to inode dropped just fine, but the former will not - it's considered hashed (it is on the ->s_anon list), so it will stay around until the memory pressure will finally do it in. As the result, we have the final iput() delayed indefinitely. It's trivial to reproduce - void flush_dcache(void) { system("mount -o remount,rw /"); } static char buf[20 * 1024 * 1024]; main() { int fd; union { struct file_handle f; char buf[MAX_HANDLE_SZ]; } x; int m; x.f.handle_bytes = sizeof(x); chdir("/root"); mkdir("foo", 0700); fd = open("foo/bar", O_CREAT | O_RDWR, 0600); close(fd); name_to_handle_at(AT_FDCWD, "foo/bar", &x.f, &m, 0); flush_dcache(); fd = open_by_handle_at(AT_FDCWD, &x.f, O_RDWR); unlink("foo/bar"); write(fd, buf, sizeof(buf)); system("df ."); /* 20Mb eaten */ close(fd); system("df ."); /* should've freed those 20Mb */ flush_dcache(); system("df ."); /* should be the same as #2 */ } will spit out something like Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 303843 1131 100% / Filesystem 1K-blocks Used Available Use% Mounted on /dev/root 322023 283282 21692 93% / - inode gets freed only when dentry is finally evicted (here we trigger than by remount; normally it would've happened in response to memory pressure hell knows when). Cc: stable@vger.kernel.org # v2.6.38+; earlier ones need s/kill_it/unhash_it/ Acked-by: J. Bruce Fields Signed-off-by: Al Viro --- fs/dcache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 7a3f3e5f9cea..5c8ea15e73a5 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -642,7 +642,7 @@ static inline bool fast_dput(struct dentry *dentry) /* * If we have a d_op->d_delete() operation, we sould not - * let the dentry count go to zero, so use "put__or_lock". + * let the dentry count go to zero, so use "put_or_lock". */ if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) return lockref_put_or_lock(&dentry->d_lockref); @@ -697,7 +697,7 @@ static inline bool fast_dput(struct dentry *dentry) */ smp_rmb(); d_flags = ACCESS_ONCE(dentry->d_flags); - d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST; + d_flags &= DCACHE_REFERENCED | DCACHE_LRU_LIST | DCACHE_DISCONNECTED; /* Nothing to do? Dropping the reference was all we needed? */ if (d_flags == (DCACHE_REFERENCED | DCACHE_LRU_LIST) && !d_unhashed(dentry)) @@ -776,6 +776,9 @@ repeat: if (unlikely(d_unhashed(dentry))) goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_DISCONNECTED)) + goto kill_it; + if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) { if (dentry->d_op->d_delete(dentry)) goto kill_it; -- cgit From 01e2d0627a9a6edb24c37db45db5ecb31e9de808 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2015 15:00:20 -0700 Subject: Revert "drm/i915: Use crtc_state->active in primary check_plane func" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit dec4f799d0a4c9edae20512fa60b0a36f3299ca2. Jörg Otte reports a NULL pointder dereference due to this commit, as 'crtc_state' very much can be NULL: crtc_state = state->base.state ? intel_atomic_get_crtc_state(state->base.state, intel_crtc) : NULL; So the change to test 'crtc_state->base.active' cannot possibly be correct as-is. There may be some other minimal fix (like just checking crtc_state for NULL), but I'm just reverting it now for the rc2 release, and people like Daniel Vetter who actually know this code will figure out what the right solution is in the longer term. Reported-and-bisected-by: Jörg Otte Cc: Ander Conselvan de Oliveira Cc: Jani Nikula Cc: Daniel Vetter CC: Maarten Lankhorst Signed-off-by: Linus Torvalds --- drivers/gpu/drm/i915/intel_display.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c index ba9321998a41..647b1404c441 100644 --- a/drivers/gpu/drm/i915/intel_display.c +++ b/drivers/gpu/drm/i915/intel_display.c @@ -13276,7 +13276,7 @@ intel_check_primary_plane(struct drm_plane *plane, if (ret) return ret; - if (crtc_state->base.active) { + if (intel_crtc->active) { struct intel_plane_state *old_state = to_intel_plane_state(plane->state); -- cgit From bc0195aad0daa2ad5b0d76cce22b167bc3435590 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Jul 2015 15:10:30 -0700 Subject: Linux 4.2-rc2 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 13270c0a9336..257ef5892ab7 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 4 PATCHLEVEL = 2 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc2 NAME = Hurr durr I'ma sheep # *DOCUMENTATION* -- cgit From df9de3c4295afa45d9c58c3d9039098af5765a21 Mon Sep 17 00:00:00 2001 From: Vitaly Andrianov Date: Mon, 6 Jul 2015 16:43:18 +0100 Subject: ARM: 8400/1: use virt_to_idmap to get phys_reset address This patch is to get correct physical address of the reset function for PAE systems, which use aliased physical memory for booting. See the "ARM: mm: Introduce virt_to_idmap() with an arch hook" for details. Signed-off-by: Vitaly Andrianov Signed-off-by: Russell King --- arch/arm/kernel/reboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/kernel/reboot.c b/arch/arm/kernel/reboot.c index 1a4d232796be..38269358fd25 100644 --- a/arch/arm/kernel/reboot.c +++ b/arch/arm/kernel/reboot.c @@ -50,7 +50,7 @@ static void __soft_restart(void *addr) flush_cache_all(); /* Switch to the identity mapping. */ - phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset = (phys_reset_t)(unsigned long)virt_to_idmap(cpu_reset); phys_reset((unsigned long)addr); /* Should never get here. */ -- cgit From 8e0c34b0d268f10d84cb3f79f59c140d545a553a Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 7 Jul 2015 18:17:05 +0100 Subject: ARM: 8402/1: perf: Don't use of_node after putting it It's possible, albeit unlikely, that using the of_node here will reference freed memory. Call of_node_put() after printing the name to be safe. Signed-off-by: Stephen Boyd Signed-off-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/perf_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 357f57ea83f4..54272e0be713 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -818,12 +818,13 @@ static int of_pmu_irq_cfg(struct arm_pmu *pmu) if (arch_find_n_match_cpu_physical_id(dn, cpu, NULL)) break; - of_node_put(dn); if (cpu >= nr_cpu_ids) { pr_warn("Failed to find logical CPU for %s\n", dn->name); + of_node_put(dn); break; } + of_node_put(dn); irqs[i] = cpu; cpumask_set_cpu(cpu, &pmu->supported_cpus); -- cgit From 462859aa7bbe1ac83ec4377a0a06fe60778f3f27 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 8 Jul 2015 13:21:55 +0100 Subject: ARM: 8404/1: dma-mapping: fix off-by-one error in bitmap size check nr_bitmaps member of mapping structure stores the number of already allocated bitmaps and it is interpreted as loop iterator (it starts from 0 not from 1), so a comparison against number of possible bitmap extensions should include this fact. This patch fixes this by changing the extension failure condition. This issue has been introduced by commit 4d852ef8c2544ce21ae41414099a7504c61164a0 ("arm: dma-mapping: Add support to extend DMA IOMMU mappings"). Reported-by: Hyungwon Hwang Signed-off-by: Marek Szyprowski Reviewed-by: Hyungwon Hwang Cc: stable@vger.kernel.org # v3.15+ Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1ced8a0f7a52..cba12f34ff77 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -1971,7 +1971,7 @@ static int extend_iommu_mapping(struct dma_iommu_mapping *mapping) { int next_bitmap; - if (mapping->nr_bitmaps > mapping->extensions) + if (mapping->nr_bitmaps >= mapping->extensions) return -EINVAL; next_bitmap = mapping->nr_bitmaps; -- cgit From bac51ad9d14f6baed3730ef53bedc1eb2238563a Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 9 Jul 2015 00:30:24 +0100 Subject: ARM: invalidate L1 before enabling coherency We must invalidate the L1 cache before enabling coherency, otherwise secondary CPUs can inject invalid cache lines into the coherent CPU cluster, which could then be migrated to other CPUs. This fixes a recent regression with SoCFPGA randomly failing to boot. Fixes: 02b4e2756e01 ("ARM: v7 setup function should invalidate L1 cache") Signed-off-by: Russell King --- arch/arm/mm/proc-v7.S | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 0716bbe19872..de2b246fed38 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -274,7 +274,10 @@ __v7_ca15mp_setup: __v7_b15mp_setup: __v7_ca17mp_setup: mov r10, #0 -1: +1: adr r12, __v7_setup_stack @ the local stack + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 + bl v7_invalidate_l1 + ldmia r12, {r0-r5, lr} #ifdef CONFIG_SMP ALT_SMP(mrc p15, 0, r0, c1, c0, 1) ALT_UP(mov r0, #(1 << 6)) @ fake it for UP @@ -283,7 +286,7 @@ __v7_ca17mp_setup: orreq r0, r0, r10 @ Enable CPU-specific SMP bits mcreq p15, 0, r0, c1, c0, 1 #endif - b __v7_setup + b __v7_setup_cont /* * Errata: @@ -413,10 +416,11 @@ __v7_pj4b_setup: __v7_setup: adr r12, __v7_setup_stack @ the local stack - stmia r12, {r0-r5, r7, r9, r11, lr} + stmia r12, {r0-r5, lr} @ v7_invalidate_l1 touches r0-r6 bl v7_invalidate_l1 - ldmia r12, {r0-r5, r7, r9, r11, lr} + ldmia r12, {r0-r5, lr} +__v7_setup_cont: and r0, r9, #0xff000000 @ ARM? teq r0, #0x41000000 bne __errata_finish @@ -480,7 +484,7 @@ ENDPROC(__v7_setup) .align 2 __v7_setup_stack: - .space 4 * 11 @ 11 registers + .space 4 * 7 @ 12 registers __INITDATA -- cgit From 0871b7248113ebfccbfabcd3fd1f867a2bc681f4 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 17 Jul 2015 10:33:04 +0100 Subject: ARM: fix __virt_to_idmap build error on !MMU Fengguang Wu reports that building ARM with !MMU results in the following build error: arch/arm/kernel/built-in.o: In function `__soft_restart': >> :(.text+0x1624): undefined reference to `arch_virt_to_idmap' Fix this by adding an appropriate IS_ENABLED(CONFIG_MMU) into the __virt_to_idmap() inline function. Signed-off-by: Russell King --- arch/arm/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/memory.h b/arch/arm/include/asm/memory.h index 6f225acc07c5..b7f6fb462ea0 100644 --- a/arch/arm/include/asm/memory.h +++ b/arch/arm/include/asm/memory.h @@ -286,7 +286,7 @@ extern phys_addr_t (*arch_virt_to_idmap)(unsigned long x); */ static inline phys_addr_t __virt_to_idmap(unsigned long x) { - if (arch_virt_to_idmap) + if (IS_ENABLED(CONFIG_MMU) && arch_virt_to_idmap) return arch_virt_to_idmap(x); else return __virt_to_phys(x); -- cgit From f81309067ff2d84788316c513a415f6bb8c9171f Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 1 Jun 2015 23:44:46 +0100 Subject: ARM: move heavy barrier support out of line The existing memory barrier macro causes a significant amount of code to be inserted inline at every call site. For example, in gpio_set_irq_type(), we have this for mb(): c0344c08: f57ff04e dsb st c0344c0c: e59f8190 ldr r8, [pc, #400] ; c0344da4 c0344c10: e3590004 cmp r9, #4 c0344c14: e5983014 ldr r3, [r8, #20] c0344c18: 0a000054 beq c0344d70 c0344c1c: e3530000 cmp r3, #0 c0344c20: 0a000004 beq c0344c38 c0344c24: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344c28: e50bc034 str ip, [fp, #-52] ; 0xffffffcc c0344c2c: e12fff33 blx r3 c0344c30: e51bc034 ldr ip, [fp, #-52] ; 0xffffffcc c0344c34: e51b2030 ldr r2, [fp, #-48] ; 0xffffffd0 c0344c38: e5963004 ldr r3, [r6, #4] Moving the outer_cache_sync() call out of line reduces the impact of the barrier: c0344968: f57ff04e dsb st c034496c: e35a0004 cmp sl, #4 c0344970: e50b2030 str r2, [fp, #-48] ; 0xffffffd0 c0344974: 0a000044 beq c0344a8c c0344978: ebf363dd bl c001d8f4 c034497c: e5953004 ldr r3, [r5, #4] This should reduce the cache footprint of this code. Overall, this results in a reduction of around 20K in the kernel size: text data bss dec hex filename 10773970 667392 10369656 21811018 14ccf4a ../build/imx6/vmlinux-old 10754219 667392 10369656 21791267 14c8223 ../build/imx6/vmlinux-new Another advantage to this approach is that we can finally resolve the issue of SoCs which have their own memory barrier requirements within multiplatform kernels (such as OMAP.) Here, the bus interconnects need additional handling to ensure that writes become visible in the correct order (eg, between dma_map() operations, writes to DMA coherent memory, and MMIO accesses.) Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/include/asm/barrier.h | 12 +++++++++--- arch/arm/include/asm/outercache.h | 17 ----------------- arch/arm/kernel/irq.c | 1 + arch/arm/mach-mmp/pm-pxa910.c | 1 + arch/arm/mach-prima2/pm.c | 1 + arch/arm/mach-ux500/cache-l2x0.c | 1 + arch/arm/mm/Kconfig | 4 ++++ arch/arm/mm/flush.c | 11 +++++++++++ 8 files changed, 28 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index 6c2327e1c732..fea99b0e2087 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -2,7 +2,6 @@ #define __ASM_BARRIER_H #ifndef __ASSEMBLY__ -#include #define nop() __asm__ __volatile__("mov\tr0,r0\t@ nop\n\t"); @@ -37,12 +36,19 @@ #define dmb(x) __asm__ __volatile__ ("" : : : "memory") #endif +#ifdef CONFIG_ARM_HEAVY_MB +extern void arm_heavy_mb(void); +#define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0) +#else +#define __arm_heavy_mb(x...) dsb(x) +#endif + #ifdef CONFIG_ARCH_HAS_BARRIERS #include #elif defined(CONFIG_ARM_DMA_MEM_BUFFERABLE) || defined(CONFIG_SMP) -#define mb() do { dsb(); outer_sync(); } while (0) +#define mb() __arm_heavy_mb() #define rmb() dsb() -#define wmb() do { dsb(st); outer_sync(); } while (0) +#define wmb() __arm_heavy_mb(st) #define dma_rmb() dmb(osh) #define dma_wmb() dmb(oshst) #else diff --git a/arch/arm/include/asm/outercache.h b/arch/arm/include/asm/outercache.h index 563b92fc2f41..c2bf24f40177 100644 --- a/arch/arm/include/asm/outercache.h +++ b/arch/arm/include/asm/outercache.h @@ -129,21 +129,4 @@ static inline void outer_resume(void) { } #endif -#ifdef CONFIG_OUTER_CACHE_SYNC -/** - * outer_sync - perform a sync point for outer cache - * - * Ensure that all outer cache operations are complete and any store - * buffers are drained. - */ -static inline void outer_sync(void) -{ - if (outer_cache.sync) - outer_cache.sync(); -} -#else -static inline void outer_sync(void) -{ } -#endif - #endif /* __ASM_OUTERCACHE_H */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 350f188c92d2..b96c8ed1723a 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/mach-mmp/pm-pxa910.c b/arch/arm/mach-mmp/pm-pxa910.c index 04c9daf9f8d7..7db5870d127f 100644 --- a/arch/arm/mach-mmp/pm-pxa910.c +++ b/arch/arm/mach-mmp/pm-pxa910.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-prima2/pm.c b/arch/arm/mach-prima2/pm.c index d99d08eeb966..83e94c95e314 100644 --- a/arch/arm/mach-prima2/pm.c +++ b/arch/arm/mach-prima2/pm.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-ux500/cache-l2x0.c b/arch/arm/mach-ux500/cache-l2x0.c index 7557bede7ae6..780bd13cd7e3 100644 --- a/arch/arm/mach-ux500/cache-l2x0.c +++ b/arch/arm/mach-ux500/cache-l2x0.c @@ -8,6 +8,7 @@ #include #include +#include #include #include "db8500-regs.h" diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 7c6b976ab8d3..df7537f12469 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -883,6 +883,7 @@ config OUTER_CACHE config OUTER_CACHE_SYNC bool + select ARM_HEAVY_MB help The outer cache has a outer_cache_fns.sync function pointer that can be used to drain the write buffer of the outer cache. @@ -1031,6 +1032,9 @@ config ARCH_HAS_BARRIERS This option allows the use of custom mandatory barriers included via the mach/barriers.h file. +config ARM_HEAVY_MB + bool + config ARCH_SUPPORTS_BIG_ENDIAN bool help diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index 34b66af516ea..ce6c2960d5ac 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -21,6 +21,17 @@ #include "mm.h" +#ifdef CONFIG_ARM_HEAVY_MB +void arm_heavy_mb(void) +{ +#ifdef CONFIG_OUTER_CACHE_SYNC + if (outer_cache.sync) + outer_cache.sync(); +#endif +} +EXPORT_SYMBOL(arm_heavy_mb); +#endif + #ifdef CONFIG_CPU_CACHE_VIPT static void flush_pfn_alias(unsigned long pfn, unsigned long vaddr) -- cgit From 4e1f8a6f1d978f033f1751e2887b3a69fab3f878 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 3 Jun 2015 13:10:16 +0100 Subject: ARM: add soc memory barrier extension Add an extension to the heavy barrier code to allow a SoC specific memory barrier function to be provided. This is needed for platforms where the interconnect has weak ordering, and thus needs assistance to ensure that memory writes are properly visible in the correct order to other parts of the system. Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/include/asm/barrier.h | 1 + arch/arm/mm/flush.c | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/arch/arm/include/asm/barrier.h b/arch/arm/include/asm/barrier.h index fea99b0e2087..3d8f1d3ad9a7 100644 --- a/arch/arm/include/asm/barrier.h +++ b/arch/arm/include/asm/barrier.h @@ -37,6 +37,7 @@ #endif #ifdef CONFIG_ARM_HEAVY_MB +extern void (*soc_mb)(void); extern void arm_heavy_mb(void); #define __arm_heavy_mb(x...) do { dsb(x); arm_heavy_mb(); } while (0) #else diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c index ce6c2960d5ac..1ec8e7590fc6 100644 --- a/arch/arm/mm/flush.c +++ b/arch/arm/mm/flush.c @@ -22,12 +22,16 @@ #include "mm.h" #ifdef CONFIG_ARM_HEAVY_MB +void (*soc_mb)(void); + void arm_heavy_mb(void) { #ifdef CONFIG_OUTER_CACHE_SYNC if (outer_cache.sync) outer_cache.sync(); #endif + if (soc_mb) + soc_mb(); } EXPORT_SYMBOL(arm_heavy_mb); #endif -- cgit From f746929ffdc8a83c0e6092343d4475f6485e13d3 Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 6 Jun 2015 00:13:40 +0100 Subject: Revert "ARM: OMAP4: remove dead kconfig option OMAP4_ERRATA_I688" This reverts commit 606da4826b89b044b51e3a84958b802204cfe4c7. We actually need this code for proper behaviour of OMAP4, and it needs fixing a different way other than just removing the code. Disabling code which is necessary in the hopes of persuing multiplatform kernels is a stupid approach. Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/mach-omap2/Kconfig | 21 ++++++++++++ arch/arm/mach-omap2/common.c | 1 + arch/arm/mach-omap2/common.h | 3 ++ arch/arm/mach-omap2/io.c | 2 ++ arch/arm/mach-omap2/omap-secure.h | 7 ++++ arch/arm/mach-omap2/omap4-common.c | 69 ++++++++++++++++++++++++++++++++++++++ arch/arm/mach-omap2/sleep44xx.S | 2 ++ 7 files changed, 105 insertions(+) diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index ecc04ff13e95..2128441430ad 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -240,6 +240,27 @@ config OMAP3_SDRC_AC_TIMING wish to say no. Selecting yes without understanding what is going on could result in system crashes; +config OMAP4_ERRATA_I688 + bool "OMAP4 errata: Async Bridge Corruption" + depends on (ARCH_OMAP4 || SOC_OMAP5) && !ARCH_MULTIPLATFORM + select ARCH_HAS_BARRIERS + help + If a data is stalled inside asynchronous bridge because of back + pressure, it may be accepted multiple times, creating pointer + misalignment that will corrupt next transfers on that data path + until next reset of the system (No recovery procedure once the + issue is hit, the path remains consistently broken). Async bridge + can be found on path between MPU to EMIF and MPU to L3 interconnect. + This situation can happen only when the idle is initiated by a + Master Request Disconnection (which is trigged by software when + executing WFI on CPU). + The work-around for this errata needs all the initiators connected + through async bridge must ensure that data path is properly drained + before issuing WFI. This condition will be met if one Strongly ordered + access is performed to the target right before executing the WFI. + In MPU case, L3 T2ASYNC FIFO and DDR T2ASYNC FIFO needs to be drained. + IO barrier ensure that there is no synchronisation loss on initiators + operating on both interconnect port simultaneously. endmenu endif diff --git a/arch/arm/mach-omap2/common.c b/arch/arm/mach-omap2/common.c index eae6a0e87c90..484cdadfb187 100644 --- a/arch/arm/mach-omap2/common.c +++ b/arch/arm/mach-omap2/common.c @@ -30,4 +30,5 @@ int __weak omap_secure_ram_reserve_memblock(void) void __init omap_reserve(void) { omap_secure_ram_reserve_memblock(); + omap_barrier_reserve_memblock(); } diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index cf3cf22ecd42..46e24581d624 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -200,6 +200,9 @@ void __init omap4_map_io(void); void __init omap5_map_io(void); void __init ti81xx_map_io(void); +/* omap_barriers_init() is OMAP4 only */ +void omap_barriers_init(void); + /** * omap_test_timeout - busy-loop, testing a condition * @cond: condition to test until it evaluates to true diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c index 820dde8b5b04..7743e3672f98 100644 --- a/arch/arm/mach-omap2/io.c +++ b/arch/arm/mach-omap2/io.c @@ -306,6 +306,7 @@ void __init am33xx_map_io(void) void __init omap4_map_io(void) { iotable_init(omap44xx_io_desc, ARRAY_SIZE(omap44xx_io_desc)); + omap_barriers_init(); } #endif @@ -313,6 +314,7 @@ void __init omap4_map_io(void) void __init omap5_map_io(void) { iotable_init(omap54xx_io_desc, ARRAY_SIZE(omap54xx_io_desc)); + omap_barriers_init(); } #endif /* diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index af2851fbcdf0..dec2b05d184b 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -70,6 +70,13 @@ extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs, extern u32 rx51_secure_update_aux_cr(u32 set_bits, u32 clear_bits); extern u32 rx51_secure_rng_call(u32 ptr, u32 count, u32 flag); +#ifdef CONFIG_OMAP4_ERRATA_I688 +extern int omap_barrier_reserve_memblock(void); +#else +static inline void omap_barrier_reserve_memblock(void) +{ } +#endif + #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER void set_cntfreq(void); #else diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 16350eefa66c..7bb116a6f86f 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -51,6 +51,75 @@ static void __iomem *twd_base; #define IRQ_LOCALTIMER 29 +#ifdef CONFIG_OMAP4_ERRATA_I688 +/* Used to implement memory barrier on DRAM path */ +#define OMAP4_DRAM_BARRIER_VA 0xfe600000 + +void __iomem *dram_sync, *sram_sync; + +static phys_addr_t paddr; +static u32 size; + +void omap_bus_sync(void) +{ + if (dram_sync && sram_sync) { + writel_relaxed(readl_relaxed(dram_sync), dram_sync); + writel_relaxed(readl_relaxed(sram_sync), sram_sync); + isb(); + } +} +EXPORT_SYMBOL(omap_bus_sync); + +static int __init omap4_sram_init(void) +{ + struct device_node *np; + struct gen_pool *sram_pool; + + np = of_find_compatible_node(NULL, NULL, "ti,omap4-mpu"); + if (!np) + pr_warn("%s:Unable to allocate sram needed to handle errata I688\n", + __func__); + sram_pool = of_get_named_gen_pool(np, "sram", 0); + if (!sram_pool) + pr_warn("%s:Unable to get sram pool needed to handle errata I688\n", + __func__); + else + sram_sync = (void *)gen_pool_alloc(sram_pool, PAGE_SIZE); + + return 0; +} +omap_arch_initcall(omap4_sram_init); + +/* Steal one page physical memory for barrier implementation */ +int __init omap_barrier_reserve_memblock(void) +{ + + size = ALIGN(PAGE_SIZE, SZ_1M); + paddr = arm_memblock_steal(size, SZ_1M); + + return 0; +} + +void __init omap_barriers_init(void) +{ + struct map_desc dram_io_desc[1]; + + dram_io_desc[0].virtual = OMAP4_DRAM_BARRIER_VA; + dram_io_desc[0].pfn = __phys_to_pfn(paddr); + dram_io_desc[0].length = size; + dram_io_desc[0].type = MT_MEMORY_RW_SO; + iotable_init(dram_io_desc, ARRAY_SIZE(dram_io_desc)); + dram_sync = (void __iomem *) dram_io_desc[0].virtual; + + pr_info("OMAP4: Map 0x%08llx to 0x%08lx for dram barrier\n", + (long long) paddr, dram_io_desc[0].virtual); + +} +#else +void __init omap_barriers_init(void) +{} +#endif + void gic_dist_disable(void) { if (gic_dist_base_addr) diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index ad1bb9431e94..b84a0122d823 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -333,9 +333,11 @@ ENDPROC(omap4_cpu_resume) #endif /* defined(CONFIG_SMP) && defined(CONFIG_PM) */ +#ifndef CONFIG_OMAP4_ERRATA_I688 ENTRY(omap_bus_sync) ret lr ENDPROC(omap_bus_sync) +#endif ENTRY(omap_do_wfi) stmfd sp!, {lr} -- cgit From 3fa609755c11fbe8770ede4d895ebb86fb7b9f1e Mon Sep 17 00:00:00 2001 From: Russell King Date: Sat, 6 Jun 2015 00:38:08 +0100 Subject: ARM: omap2: restore OMAP4 barrier behaviour Restore the OMAP4 barrier behaviour using the new implementation which allows multiplatform systems to hook into the mb() and wmb() ARM implementations to perform any necessary additional barrier maintanence. Acked-by: Tony Lindgren Acked-by: Richard Woodruff Signed-off-by: Russell King --- arch/arm/mach-omap2/Kconfig | 28 +++------ arch/arm/mach-omap2/common.h | 12 +++- arch/arm/mach-omap2/include/mach/barriers.h | 33 ----------- arch/arm/mach-omap2/omap-secure.h | 7 --- arch/arm/mach-omap2/omap4-common.c | 92 ++++++++++++++++++++++------- arch/arm/mach-omap2/sleep44xx.S | 10 +--- 6 files changed, 91 insertions(+), 91 deletions(-) delete mode 100644 arch/arm/mach-omap2/include/mach/barriers.h diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig index 2128441430ad..8427997e09c4 100644 --- a/arch/arm/mach-omap2/Kconfig +++ b/arch/arm/mach-omap2/Kconfig @@ -29,6 +29,7 @@ config ARCH_OMAP4 select HAVE_ARM_SCU if SMP select HAVE_ARM_TWD if SMP select OMAP_INTERCONNECT + select OMAP_INTERCONNECT_BARRIER select PL310_ERRATA_588369 if CACHE_L2X0 select PL310_ERRATA_727915 if CACHE_L2X0 select PM_OPP if PM @@ -46,6 +47,7 @@ config SOC_OMAP5 select HAVE_ARM_TWD if SMP select HAVE_ARM_ARCH_TIMER select ARM_ERRATA_798181 if SMP + select OMAP_INTERCONNECT_BARRIER config SOC_AM33XX bool "TI AM33XX" @@ -70,6 +72,7 @@ config SOC_DRA7XX select HAVE_ARM_ARCH_TIMER select IRQ_CROSSBAR select ARM_ERRATA_798181 if SMP + select OMAP_INTERCONNECT_BARRIER config ARCH_OMAP2PLUS bool @@ -91,6 +94,10 @@ config ARCH_OMAP2PLUS help Systems based on OMAP2, OMAP3, OMAP4 or OMAP5 +config OMAP_INTERCONNECT_BARRIER + bool + select ARM_HEAVY_MB + if ARCH_OMAP2PLUS @@ -240,27 +247,6 @@ config OMAP3_SDRC_AC_TIMING wish to say no. Selecting yes without understanding what is going on could result in system crashes; -config OMAP4_ERRATA_I688 - bool "OMAP4 errata: Async Bridge Corruption" - depends on (ARCH_OMAP4 || SOC_OMAP5) && !ARCH_MULTIPLATFORM - select ARCH_HAS_BARRIERS - help - If a data is stalled inside asynchronous bridge because of back - pressure, it may be accepted multiple times, creating pointer - misalignment that will corrupt next transfers on that data path - until next reset of the system (No recovery procedure once the - issue is hit, the path remains consistently broken). Async bridge - can be found on path between MPU to EMIF and MPU to L3 interconnect. - This situation can happen only when the idle is initiated by a - Master Request Disconnection (which is trigged by software when - executing WFI on CPU). - The work-around for this errata needs all the initiators connected - through async bridge must ensure that data path is properly drained - before issuing WFI. This condition will be met if one Strongly ordered - access is performed to the target right before executing the WFI. - In MPU case, L3 T2ASYNC FIFO and DDR T2ASYNC FIFO needs to be drained. - IO barrier ensure that there is no synchronisation loss on initiators - operating on both interconnect port simultaneously. endmenu endif diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h index 46e24581d624..82f88b4ec15f 100644 --- a/arch/arm/mach-omap2/common.h +++ b/arch/arm/mach-omap2/common.h @@ -189,6 +189,15 @@ static inline void omap44xx_restart(enum reboot_mode mode, const char *cmd) } #endif +#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER +void omap_barrier_reserve_memblock(void); +void omap_barriers_init(void); +#else +static inline void omap_barrier_reserve_memblock(void) +{ +} +#endif + /* This gets called from mach-omap2/io.c, do not call this */ void __init omap2_set_globals_tap(u32 class, void __iomem *tap); @@ -200,9 +209,6 @@ void __init omap4_map_io(void); void __init omap5_map_io(void); void __init ti81xx_map_io(void); -/* omap_barriers_init() is OMAP4 only */ -void omap_barriers_init(void); - /** * omap_test_timeout - busy-loop, testing a condition * @cond: condition to test until it evaluates to true diff --git a/arch/arm/mach-omap2/include/mach/barriers.h b/arch/arm/mach-omap2/include/mach/barriers.h deleted file mode 100644 index 1c582a8592b9..000000000000 --- a/arch/arm/mach-omap2/include/mach/barriers.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * OMAP memory barrier header. - * - * Copyright (C) 2011 Texas Instruments, Inc. - * Santosh Shilimkar - * Richard Woodruff - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __MACH_BARRIERS_H -#define __MACH_BARRIERS_H - -#include - -extern void omap_bus_sync(void); - -#define rmb() dsb() -#define wmb() do { dsb(); outer_sync(); omap_bus_sync(); } while (0) -#define mb() wmb() - -#endif /* __MACH_BARRIERS_H */ diff --git a/arch/arm/mach-omap2/omap-secure.h b/arch/arm/mach-omap2/omap-secure.h index dec2b05d184b..af2851fbcdf0 100644 --- a/arch/arm/mach-omap2/omap-secure.h +++ b/arch/arm/mach-omap2/omap-secure.h @@ -70,13 +70,6 @@ extern u32 rx51_secure_dispatcher(u32 idx, u32 process, u32 flag, u32 nargs, extern u32 rx51_secure_update_aux_cr(u32 set_bits, u32 clear_bits); extern u32 rx51_secure_rng_call(u32 ptr, u32 count, u32 flag); -#ifdef CONFIG_OMAP4_ERRATA_I688 -extern int omap_barrier_reserve_memblock(void); -#else -static inline void omap_barrier_reserve_memblock(void) -{ } -#endif - #ifdef CONFIG_SOC_HAS_REALTIME_COUNTER void set_cntfreq(void); #else diff --git a/arch/arm/mach-omap2/omap4-common.c b/arch/arm/mach-omap2/omap4-common.c index 7bb116a6f86f..949696b6f17b 100644 --- a/arch/arm/mach-omap2/omap4-common.c +++ b/arch/arm/mach-omap2/omap4-common.c @@ -51,16 +51,73 @@ static void __iomem *twd_base; #define IRQ_LOCALTIMER 29 -#ifdef CONFIG_OMAP4_ERRATA_I688 +#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER + /* Used to implement memory barrier on DRAM path */ #define OMAP4_DRAM_BARRIER_VA 0xfe600000 -void __iomem *dram_sync, *sram_sync; +static void __iomem *dram_sync, *sram_sync; +static phys_addr_t dram_sync_paddr; +static u32 dram_sync_size; + +/* + * The OMAP4 bus structure contains asynchrnous bridges which can buffer + * data writes from the MPU. These asynchronous bridges can be found on + * paths between the MPU to EMIF, and the MPU to L3 interconnects. + * + * We need to be careful about re-ordering which can happen as a result + * of different accesses being performed via different paths, and + * therefore different asynchronous bridges. + */ -static phys_addr_t paddr; -static u32 size; +/* + * OMAP4 interconnect barrier which is called for each mb() and wmb(). + * This is to ensure that normal paths to DRAM (normal memory, cacheable + * accesses) are properly synchronised with writes to DMA coherent memory + * (normal memory, uncacheable) and device writes. + * + * The mb() and wmb() barriers only operate only on the MPU->MA->EMIF + * path, as we need to ensure that data is visible to other system + * masters prior to writes to those system masters being seen. + * + * Note: the SRAM path is not synchronised via mb() and wmb(). + */ +static void omap4_mb(void) +{ + if (dram_sync) + writel_relaxed(0, dram_sync); +} -void omap_bus_sync(void) +/* + * OMAP4 Errata i688 - asynchronous bridge corruption when entering WFI. + * + * If a data is stalled inside asynchronous bridge because of back + * pressure, it may be accepted multiple times, creating pointer + * misalignment that will corrupt next transfers on that data path until + * next reset of the system. No recovery procedure once the issue is hit, + * the path remains consistently broken. + * + * Async bridges can be found on paths between MPU to EMIF and MPU to L3 + * interconnects. + * + * This situation can happen only when the idle is initiated by a Master + * Request Disconnection (which is trigged by software when executing WFI + * on the CPU). + * + * The work-around for this errata needs all the initiators connected + * through an async bridge to ensure that data path is properly drained + * before issuing WFI. This condition will be met if one Strongly ordered + * access is performed to the target right before executing the WFI. + * + * In MPU case, L3 T2ASYNC FIFO and DDR T2ASYNC FIFO needs to be drained. + * IO barrier ensure that there is no synchronisation loss on initiators + * operating on both interconnect port simultaneously. + * + * This is a stronger version of the OMAP4 memory barrier below, and + * operates on both the MPU->MA->EMIF path but also the MPU->OCP path + * as well, and is necessary prior to executing a WFI. + */ +void omap_interconnect_sync(void) { if (dram_sync && sram_sync) { writel_relaxed(readl_relaxed(dram_sync), dram_sync); @@ -68,7 +125,6 @@ void omap_bus_sync(void) isb(); } } -EXPORT_SYMBOL(omap_bus_sync); static int __init omap4_sram_init(void) { @@ -79,7 +135,7 @@ static int __init omap4_sram_init(void) if (!np) pr_warn("%s:Unable to allocate sram needed to handle errata I688\n", __func__); - sram_pool = of_get_named_gen_pool(np, "sram", 0); + sram_pool = of_gen_pool_get(np, "sram", 0); if (!sram_pool) pr_warn("%s:Unable to get sram pool needed to handle errata I688\n", __func__); @@ -91,13 +147,10 @@ static int __init omap4_sram_init(void) omap_arch_initcall(omap4_sram_init); /* Steal one page physical memory for barrier implementation */ -int __init omap_barrier_reserve_memblock(void) +void __init omap_barrier_reserve_memblock(void) { - - size = ALIGN(PAGE_SIZE, SZ_1M); - paddr = arm_memblock_steal(size, SZ_1M); - - return 0; + dram_sync_size = ALIGN(PAGE_SIZE, SZ_1M); + dram_sync_paddr = arm_memblock_steal(dram_sync_size, SZ_1M); } void __init omap_barriers_init(void) @@ -105,19 +158,18 @@ void __init omap_barriers_init(void) struct map_desc dram_io_desc[1]; dram_io_desc[0].virtual = OMAP4_DRAM_BARRIER_VA; - dram_io_desc[0].pfn = __phys_to_pfn(paddr); - dram_io_desc[0].length = size; + dram_io_desc[0].pfn = __phys_to_pfn(dram_sync_paddr); + dram_io_desc[0].length = dram_sync_size; dram_io_desc[0].type = MT_MEMORY_RW_SO; iotable_init(dram_io_desc, ARRAY_SIZE(dram_io_desc)); dram_sync = (void __iomem *) dram_io_desc[0].virtual; - pr_info("OMAP4: Map 0x%08llx to 0x%08lx for dram barrier\n", - (long long) paddr, dram_io_desc[0].virtual); + pr_info("OMAP4: Map %pa to %p for dram barrier\n", + &dram_sync_paddr, dram_sync); + soc_mb = omap4_mb; } -#else -void __init omap_barriers_init(void) -{} + #endif void gic_dist_disable(void) diff --git a/arch/arm/mach-omap2/sleep44xx.S b/arch/arm/mach-omap2/sleep44xx.S index b84a0122d823..9b09d85d811a 100644 --- a/arch/arm/mach-omap2/sleep44xx.S +++ b/arch/arm/mach-omap2/sleep44xx.S @@ -333,16 +333,12 @@ ENDPROC(omap4_cpu_resume) #endif /* defined(CONFIG_SMP) && defined(CONFIG_PM) */ -#ifndef CONFIG_OMAP4_ERRATA_I688 -ENTRY(omap_bus_sync) - ret lr -ENDPROC(omap_bus_sync) -#endif - ENTRY(omap_do_wfi) stmfd sp!, {lr} +#ifdef CONFIG_OMAP_INTERCONNECT_BARRIER /* Drain interconnect write buffers. */ - bl omap_bus_sync + bl omap_interconnect_sync +#endif /* * Execute an ISB instruction to ensure that all of the -- cgit From 3473f26592c1c365d376aee29433d7db75f14d1e Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Fri, 17 Jul 2015 21:40:28 +0100 Subject: ARM: 8405/1: VDSO: fix regression with toolchains lacking ld.bfd executable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Sourcery CodeBench Lite 2014.05 toolchain (gcc 4.8.3, binutils 2.24.51) has a GCC which implements -fuse-ld, and it doesn't include the gold linker, but it lacks an ld.bfd executable in its installation. This means that passing -fuse-ld=bfd fails with: VDSO arch/arm/vdso/vdso.so.raw collect2: fatal error: cannot find 'ld' Arguably this is a deficiency in the toolchain, but I suspect it's commonly used enough that it's worth accommodating: just use cc-ldoption (to cause a link attempt) instead of cc-option to test whether we can use -fuse-ld. So -fuse-ld=bfd won't be used with this toolchain, but the build will rightly succeed, just as it does for toolchains which don't implement -fuse-ld (and don't use gold as the default linker). Note: this will change the failure mode for a corner case I was trying to handle in d2b30cd4b722, where the toolchain defaults to the gold linker and the BFD linker is not found in PATH, from: VDSO arch/arm/vdso/vdso.so.raw collect2: fatal error: cannot find 'ld' i.e. the BFD linker is not found, to: OBJCOPY arch/arm/vdso/vdso.so BFD: arch/arm/vdso/vdso.so: Not enough room for program headers, try linking with -N that is, we fail to prevent gold from being used as the linker, and it produces an object that objcopy can't digest. Reported-by: Baruch Siach Tested-by: Baruch Siach Tested-by: Raphaël Poggi Fixes: d2b30cd4b722 ("ARM: 8384/1: VDSO: force use of BFD linker") Cc: stable@vger.kernel.org Signed-off-by: Nathan Lynch Signed-off-by: Russell King --- arch/arm/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 9d259d94e429..1160434eece0 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -14,7 +14,7 @@ VDSO_LDFLAGS += -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 VDSO_LDFLAGS += -nostdlib -shared VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) VDSO_LDFLAGS += $(call cc-ldoption, -Wl$(comma)--build-id) -VDSO_LDFLAGS += $(call cc-option, -fuse-ld=bfd) +VDSO_LDFLAGS += $(call cc-ldoption, -fuse-ld=bfd) obj-$(CONFIG_VDSO) += vdso.o extra-$(CONFIG_VDSO) += vdso.lds -- cgit From 787047eea24a2443c366679ae6b5a3873a33b64e Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 29 Jul 2015 00:34:48 +0100 Subject: ARM: 8392/3: smp: Only expose /sys/.../cpuX/online if hotpluggable Writes to /sys/.../cpuX/online fail if we determine the platform doesn't support hotplug for that CPU. Furthermore, if the cpu_die op isn't specified the system hangs when we try to offline a CPU and it comes right back online unexpectedly. Let's figure this stuff out before we make the sysfs nodes so that the online file doesn't even exist if it isn't (at least sometimes) possible to hotplug the CPU. Add a new 'cpu_can_disable' op and repoint all 'cpu_disable' implementations at it because all implementers use the op to indicate if a CPU can be hotplugged or not in a static fashion. With PSCI we may need to add a 'cpu_disable' op so that the secure OS can be migrated off the CPU we're trying to hotplug. In this case, the 'cpu_can_disable' op will indicate that all CPUs are hotpluggable by returning true, but the 'cpu_disable' op will make a PSCI migration call and occasionally fail, denying the hotplug of a CPU. This shouldn't be any worse than x86 where we may indicate that all CPUs are hotpluggable but occasionally we can't offline a CPU due to check_irq_vectors_for_cpu_disable() failing to find a CPU to move vectors to. Cc: Mark Rutland Cc: Nicolas Pitre Cc: Dave Martin Acked-by: Simon Horman [shmobile portion] Tested-by: Simon Horman Cc: Magnus Damm Cc: Tested-by: Tyler Baker Cc: Geert Uytterhoeven Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/common/mcpm_platsmp.c | 12 ++++-------- arch/arm/include/asm/smp.h | 1 + arch/arm/include/asm/smp_plat.h | 9 +++++++++ arch/arm/kernel/setup.c | 2 +- arch/arm/kernel/smp.c | 15 ++++++++++++++- arch/arm/mach-shmobile/common.h | 2 +- arch/arm/mach-shmobile/platsmp.c | 4 ++-- arch/arm/mach-shmobile/smp-r8a7790.c | 2 +- arch/arm/mach-shmobile/smp-r8a7791.c | 2 +- arch/arm/mach-shmobile/smp-sh73a0.c | 2 +- 10 files changed, 35 insertions(+), 16 deletions(-) diff --git a/arch/arm/common/mcpm_platsmp.c b/arch/arm/common/mcpm_platsmp.c index 92e54d7c6f46..2b25b6038f66 100644 --- a/arch/arm/common/mcpm_platsmp.c +++ b/arch/arm/common/mcpm_platsmp.c @@ -65,14 +65,10 @@ static int mcpm_cpu_kill(unsigned int cpu) return !mcpm_wait_for_cpu_powerdown(pcpu, pcluster); } -static int mcpm_cpu_disable(unsigned int cpu) +static bool mcpm_cpu_can_disable(unsigned int cpu) { - /* - * We assume all CPUs may be shut down. - * This would be the hook to use for eventual Secure - * OS migration requests as described in the PSCI spec. - */ - return 0; + /* We assume all CPUs may be shut down. */ + return true; } static void mcpm_cpu_die(unsigned int cpu) @@ -92,7 +88,7 @@ static struct smp_operations __initdata mcpm_smp_ops = { .smp_secondary_init = mcpm_secondary_init, #ifdef CONFIG_HOTPLUG_CPU .cpu_kill = mcpm_cpu_kill, - .cpu_disable = mcpm_cpu_disable, + .cpu_can_disable = mcpm_cpu_can_disable, .cpu_die = mcpm_cpu_die, #endif }; diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 2f3ac1ba6fb4..318ce89eeff7 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -105,6 +105,7 @@ struct smp_operations { #ifdef CONFIG_HOTPLUG_CPU int (*cpu_kill)(unsigned int cpu); void (*cpu_die)(unsigned int cpu); + bool (*cpu_can_disable)(unsigned int cpu); int (*cpu_disable)(unsigned int cpu); #endif #endif diff --git a/arch/arm/include/asm/smp_plat.h b/arch/arm/include/asm/smp_plat.h index 993e5224d8f7..f9080717fc88 100644 --- a/arch/arm/include/asm/smp_plat.h +++ b/arch/arm/include/asm/smp_plat.h @@ -107,4 +107,13 @@ static inline u32 mpidr_hash_size(void) extern int platform_can_secondary_boot(void); extern int platform_can_cpu_hotplug(void); +#ifdef CONFIG_HOTPLUG_CPU +extern int platform_can_hotplug_cpu(unsigned int cpu); +#else +static inline int platform_can_hotplug_cpu(unsigned int cpu) +{ + return 0; +} +#endif + #endif diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 36c18b73c1f4..6bbec6042052 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -1015,7 +1015,7 @@ static int __init topology_init(void) for_each_possible_cpu(cpu) { struct cpuinfo_arm *cpuinfo = &per_cpu(cpu_data, cpu); - cpuinfo->cpu.hotpluggable = 1; + cpuinfo->cpu.hotpluggable = platform_can_hotplug_cpu(cpu); register_cpu(&cpuinfo->cpu, cpu); } diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 90dfbedfbfb8..3cd846f48eaf 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -175,13 +175,26 @@ static int platform_cpu_disable(unsigned int cpu) if (smp_ops.cpu_disable) return smp_ops.cpu_disable(cpu); + return 0; +} + +int platform_can_hotplug_cpu(unsigned int cpu) +{ + /* cpu_die must be specified to support hotplug */ + if (!smp_ops.cpu_die) + return 0; + + if (smp_ops.cpu_can_disable) + return smp_ops.cpu_can_disable(cpu); + /* * By default, allow disabling all CPUs except the first one, * since this is special on a lot of platforms, e.g. because * of clock tick interrupts. */ - return cpu == 0 ? -EPERM : 0; + return cpu != 0; } + /* * __cpu_disable runs on the processor to be shutdown. */ diff --git a/arch/arm/mach-shmobile/common.h b/arch/arm/mach-shmobile/common.h index 476092b86c6e..8d27ec546a35 100644 --- a/arch/arm/mach-shmobile/common.h +++ b/arch/arm/mach-shmobile/common.h @@ -13,7 +13,7 @@ extern void shmobile_smp_boot(void); extern void shmobile_smp_sleep(void); extern void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg); -extern int shmobile_smp_cpu_disable(unsigned int cpu); +extern bool shmobile_smp_cpu_can_disable(unsigned int cpu); extern void shmobile_boot_scu(void); extern void shmobile_smp_scu_prepare_cpus(unsigned int max_cpus); extern void shmobile_smp_scu_cpu_die(unsigned int cpu); diff --git a/arch/arm/mach-shmobile/platsmp.c b/arch/arm/mach-shmobile/platsmp.c index 3923e09e966d..b23378f3d7e1 100644 --- a/arch/arm/mach-shmobile/platsmp.c +++ b/arch/arm/mach-shmobile/platsmp.c @@ -31,8 +31,8 @@ void shmobile_smp_hook(unsigned int cpu, unsigned long fn, unsigned long arg) } #ifdef CONFIG_HOTPLUG_CPU -int shmobile_smp_cpu_disable(unsigned int cpu) +bool shmobile_smp_cpu_can_disable(unsigned int cpu) { - return 0; /* Hotplug of any CPU is supported */ + return true; /* Hotplug of any CPU is supported */ } #endif diff --git a/arch/arm/mach-shmobile/smp-r8a7790.c b/arch/arm/mach-shmobile/smp-r8a7790.c index 930f45cbc08a..947e437cab68 100644 --- a/arch/arm/mach-shmobile/smp-r8a7790.c +++ b/arch/arm/mach-shmobile/smp-r8a7790.c @@ -64,7 +64,7 @@ struct smp_operations r8a7790_smp_ops __initdata = { .smp_prepare_cpus = r8a7790_smp_prepare_cpus, .smp_boot_secondary = shmobile_smp_apmu_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_apmu_cpu_die, .cpu_kill = shmobile_smp_apmu_cpu_kill, #endif diff --git a/arch/arm/mach-shmobile/smp-r8a7791.c b/arch/arm/mach-shmobile/smp-r8a7791.c index 5e2d1db79afa..b2508c0d276b 100644 --- a/arch/arm/mach-shmobile/smp-r8a7791.c +++ b/arch/arm/mach-shmobile/smp-r8a7791.c @@ -58,7 +58,7 @@ struct smp_operations r8a7791_smp_ops __initdata = { .smp_prepare_cpus = r8a7791_smp_prepare_cpus, .smp_boot_secondary = r8a7791_smp_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_apmu_cpu_die, .cpu_kill = shmobile_smp_apmu_cpu_kill, #endif diff --git a/arch/arm/mach-shmobile/smp-sh73a0.c b/arch/arm/mach-shmobile/smp-sh73a0.c index 2106d6b76a06..ae7c764fd6b4 100644 --- a/arch/arm/mach-shmobile/smp-sh73a0.c +++ b/arch/arm/mach-shmobile/smp-sh73a0.c @@ -68,7 +68,7 @@ struct smp_operations sh73a0_smp_ops __initdata = { .smp_prepare_cpus = sh73a0_smp_prepare_cpus, .smp_boot_secondary = sh73a0_boot_secondary, #ifdef CONFIG_HOTPLUG_CPU - .cpu_disable = shmobile_smp_cpu_disable, + .cpu_can_disable = shmobile_smp_cpu_can_disable, .cpu_die = shmobile_smp_scu_cpu_die, .cpu_kill = shmobile_smp_scu_cpu_kill, #endif -- cgit From 9ac87c5a0b19417f925dc61cac7198d872159a2c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 31 Jul 2015 11:28:54 +0100 Subject: ARM: 8407/1: switch_to: Remove finish_arch_switch Fold finish_arch_switch() into switch_to(), in preparation for the removal of the finish_arch_switch call from core sched code. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Russell King --- arch/arm/include/asm/switch_to.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h index c99e259469f7..12ebfcc1d539 100644 --- a/arch/arm/include/asm/switch_to.h +++ b/arch/arm/include/asm/switch_to.h @@ -10,7 +10,9 @@ * CPU. */ #if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) && defined(CONFIG_CPU_V7) -#define finish_arch_switch(prev) dsb(ish) +#define __complete_pending_tlbi() dsb(ish) +#else +#define __complete_pending_tlbi() #endif /* @@ -22,6 +24,7 @@ extern struct task_struct *__switch_to(struct task_struct *, struct thread_info #define switch_to(prev,next,last) \ do { \ + __complete_pending_tlbi(); \ last = __switch_to(prev,task_thread_info(prev), task_thread_info(next)); \ } while (0) -- cgit From 1234e3fda9aa24b2d650bbcd9ef09d5f6a12dc86 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 24 Jul 2015 09:10:55 +0100 Subject: ARM: reduce visibility of dmac_* functions The dmac_* functions are private to the ARM DMA API implementation, and should not be used by drivers. In order to discourage their use, remove their prototypes and macros from asm/*.h. We have to leave dmac_flush_range() behind as Exynos and MSM IOMMU code use these; once these sites are fixed, this can be moved also. Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 4 ---- arch/arm/include/asm/glue-cache.h | 2 -- arch/arm/mm/dma-mapping.c | 1 + arch/arm/mm/dma.h | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 33 insertions(+), 6 deletions(-) create mode 100644 arch/arm/mm/dma.h diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index 4812cda8fd17..c5230a44eeca 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -140,8 +140,6 @@ extern struct cpu_cache_fns cpu_cache; * is visible to DMA, or data written by DMA to system memory is * visible to the CPU. */ -#define dmac_map_area cpu_cache.dma_map_area -#define dmac_unmap_area cpu_cache.dma_unmap_area #define dmac_flush_range cpu_cache.dma_flush_range #else @@ -161,8 +159,6 @@ extern void __cpuc_flush_dcache_area(void *, size_t); * is visible to DMA, or data written by DMA to system memory is * visible to the CPU. */ -extern void dmac_map_area(const void *, size_t, int); -extern void dmac_unmap_area(const void *, size_t, int); extern void dmac_flush_range(const void *, const void *); #endif diff --git a/arch/arm/include/asm/glue-cache.h b/arch/arm/include/asm/glue-cache.h index a3c24cd5b7c8..cab07f69382d 100644 --- a/arch/arm/include/asm/glue-cache.h +++ b/arch/arm/include/asm/glue-cache.h @@ -158,8 +158,6 @@ static inline void nop_dma_unmap_area(const void *s, size_t l, int f) { } #define __cpuc_coherent_user_range __glue(_CACHE,_coherent_user_range) #define __cpuc_flush_dcache_area __glue(_CACHE,_flush_kern_dcache_area) -#define dmac_map_area __glue(_CACHE,_dma_map_area) -#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) #define dmac_flush_range __glue(_CACHE,_dma_flush_range) #endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 1ced8a0f7a52..5edf17cf043d 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -39,6 +39,7 @@ #include #include +#include "dma.h" #include "mm.h" /* diff --git a/arch/arm/mm/dma.h b/arch/arm/mm/dma.h new file mode 100644 index 000000000000..70ea6852f94e --- /dev/null +++ b/arch/arm/mm/dma.h @@ -0,0 +1,32 @@ +#ifndef DMA_H +#define DMA_H + +#include + +#ifndef MULTI_CACHE +#define dmac_map_area __glue(_CACHE,_dma_map_area) +#define dmac_unmap_area __glue(_CACHE,_dma_unmap_area) + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +extern void dmac_map_area(const void *, size_t, int); +extern void dmac_unmap_area(const void *, size_t, int); + +#else + +/* + * These are private to the dma-mapping API. Do not use directly. + * Their sole purpose is to ensure that data held in the cache + * is visible to DMA, or data written by DMA to system memory is + * visible to the CPU. + */ +#define dmac_map_area cpu_cache.dma_map_area +#define dmac_unmap_area cpu_cache.dma_unmap_area + +#endif + +#endif -- cgit From 21caf3a765b0a88f8fedf63b36e5d15683b73fe5 Mon Sep 17 00:00:00 2001 From: Lorenzo Nava Date: Thu, 2 Jul 2015 17:28:03 +0100 Subject: ARM: 8398/1: arm DMA: Fix allocation from CMA for coherent DMA This patch allows the use of CMA for DMA coherent memory allocation. At the moment if the input parameter "is_coherent" is set to true the allocation is not made using the CMA, which I think is not the desired behaviour. The patch covers the allocation and free of memory for coherent DMA. Signed-off-by: Lorenzo Nava Reviewed-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/mm/dma-mapping.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 5edf17cf043d..ad7419e69967 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -649,14 +649,18 @@ static void *__dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, size = PAGE_ALIGN(size); want_vaddr = !dma_get_attr(DMA_ATTR_NO_KERNEL_MAPPING, attrs); - if (is_coherent || nommu()) + if (nommu()) + addr = __alloc_simple_buffer(dev, size, gfp, &page); + else if (dev_get_cma_area(dev) && (gfp & __GFP_WAIT)) + addr = __alloc_from_contiguous(dev, size, prot, &page, + caller, want_vaddr); + else if (is_coherent) addr = __alloc_simple_buffer(dev, size, gfp, &page); else if (!(gfp & __GFP_WAIT)) addr = __alloc_from_pool(size, &page); - else if (!dev_get_cma_area(dev)) - addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, caller, want_vaddr); else - addr = __alloc_from_contiguous(dev, size, prot, &page, caller, want_vaddr); + addr = __alloc_remap_buffer(dev, size, gfp, prot, &page, + caller, want_vaddr); if (page) *handle = pfn_to_dma(dev, page_to_pfn(page)); @@ -684,13 +688,12 @@ void *arm_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, static void *arm_coherent_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, struct dma_attrs *attrs) { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL); void *memory; if (dma_alloc_from_coherent(dev, size, handle, &memory)) return memory; - return __dma_alloc(dev, size, handle, gfp, prot, true, + return __dma_alloc(dev, size, handle, gfp, PAGE_KERNEL, true, attrs, __builtin_return_address(0)); } @@ -754,12 +757,12 @@ static void __arm_dma_free(struct device *dev, size_t size, void *cpu_addr, size = PAGE_ALIGN(size); - if (is_coherent || nommu()) { + if (nommu()) { __dma_free_buffer(page, size); - } else if (__free_from_pool(cpu_addr, size)) { + } else if (!is_coherent && __free_from_pool(cpu_addr, size)) { return; } else if (!dev_get_cma_area(dev)) { - if (want_vaddr) + if (want_vaddr && !is_coherent) __dma_free_remap(cpu_addr, size); __dma_free_buffer(page, size); } else { -- cgit From 998ef5d81c74c752d74c7925bc370909b84adb9d Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Thu, 6 Aug 2015 15:07:04 +0100 Subject: ARM: 8408/1: Fix the secondary_startup function in Big Endian case Since the commit "b2c3e38a5471 ARM: redo TTBR setup code for LPAE", the setup code had been reworked. As a result the secondary CPUs failed to come online in Big Endian. As explained by Russell, the new code expected the value in r4/r5 to be the least significant 32bits in r4 and the most significant 32bits in r5. However, in the secondary code, we load this using ldrd, which on BE reverses that. This patch swap r4/r5 after the ldrd. It is done using the xor instructions in order to not use a temporary register. Signed-off-by: Gregory CLEMENT Signed-off-by: Russell King --- arch/arm/kernel/head.S | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index bd755d97e459..29e2991465cb 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -399,6 +399,9 @@ ENTRY(secondary_startup) sub lr, r4, r5 @ mmu has been enabled add r3, r7, lr ldrd r4, [r3, #0] @ get secondary_data.pgdir +ARM_BE8(eor r4, r4, r5) @ Swap r5 and r4 in BE: +ARM_BE8(eor r5, r4, r5) @ it can be done in 3 steps +ARM_BE8(eor r4, r4, r5) @ without using a temp reg. ldr r8, [r3, #8] @ get secondary_data.swapper_pg_dir badr lr, __enable_mmu @ return address mov r13, r12 @ __secondary_switched address -- cgit From e83dd3770021910293edea6fb2dc2fa306b1bf34 Mon Sep 17 00:00:00 2001 From: Drew Richardson Date: Thu, 6 Aug 2015 18:50:27 +0100 Subject: ARM: 8409/1: Mark ret_fast_syscall as a function ret_fast_syscall runs when user space makes a syscall. However it needs to be marked as such so the ELF information is correct. Before it was: 101: 8000f300 0 NOTYPE LOCAL DEFAULT 2 ret_fast_syscall But with this change it correctly shows as: 101: 8000f300 96 FUNC LOCAL DEFAULT 2 ret_fast_syscall I see this function when using perf to unwind call stacks from kernel space to user space. Without this change I would need to add some special case logic when using the vmlinux ELF information. Signed-off-by: Drew Richardson Acked-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-common.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 92828a1dec80..b48dd4f37f80 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -61,6 +61,7 @@ work_pending: movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) ldmia sp, {r0 - r6} @ have to reload r0 - r6 b local_restart @ ... and off we go +ENDPROC(ret_fast_syscall) /* * "slow" syscall return path. "why" tells us if this was a real syscall. -- cgit From 09edea4f8fdeb4e292b80d493296070f5ec64e6e Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 10 Aug 2015 17:36:06 +0100 Subject: ARM: 8410/1: VDSO: fix coarse clock monotonicity regression Since 906c55579a63 ("timekeeping: Copy the shadow-timekeeper over the real timekeeper last") it has become possible on ARM to: - Obtain a CLOCK_MONOTONIC_COARSE or CLOCK_REALTIME_COARSE timestamp via syscall. - Subsequently obtain a timestamp for the same clock ID via VDSO which predates the first timestamp (by one jiffy). This is because ARM's update_vsyscall is deriving the coarse time using the __current_kernel_time interface, when it should really be using the timekeeper object provided to it by the timekeeping core. It happened to work before only because __current_kernel_time would access the same timekeeper object which had been passed to update_vsyscall. This is no longer the case. Cc: stable@vger.kernel.org Fixes: 906c55579a63 ("timekeeping: Copy the shadow-timekeeper over the real timekeeper last") Signed-off-by: Nathan Lynch Acked-by: Will Deacon Signed-off-by: Russell King --- arch/arm/kernel/vdso.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index efe17dd9b921..54a5aeab988d 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -296,7 +296,6 @@ static bool tk_is_cntvct(const struct timekeeper *tk) */ void update_vsyscall(struct timekeeper *tk) { - struct timespec xtime_coarse; struct timespec64 *wtm = &tk->wall_to_monotonic; if (!cntvct_ok) { @@ -308,10 +307,10 @@ void update_vsyscall(struct timekeeper *tk) vdso_write_begin(vdso_data); - xtime_coarse = __current_kernel_time(); vdso_data->tk_is_cntvct = tk_is_cntvct(tk); - vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; - vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->xtime_coarse_sec = tk->xtime_sec; + vdso_data->xtime_coarse_nsec = (u32)(tk->tkr_mono.xtime_nsec >> + tk->tkr_mono.shift); vdso_data->wtm_clock_sec = wtm->tv_sec; vdso_data->wtm_clock_nsec = wtm->tv_nsec; -- cgit From efaa6e266ba70439da00e7f1c8a218e243ae140a Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 24 Jul 2015 10:21:02 +0100 Subject: firmware: qcom_scm-32: replace open-coded call to __cpuc_flush_dcache_area() Rathe rthan directly accessing architecture internal functions, provide an "method"-centric wrapper for qcom_scm-32 to do what's necessary to ensure that the secure monitor can see the data. This is called "secure_flush_area" and ensures that the specified memory area is coherent across the secure boundary. Acked-by: Andy Gross Reviewed-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/include/asm/cacheflush.h | 17 +++++++++++++++++ drivers/firmware/qcom_scm-32.c | 4 +--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h index c5230a44eeca..d5525bfc7e3e 100644 --- a/arch/arm/include/asm/cacheflush.h +++ b/arch/arm/include/asm/cacheflush.h @@ -502,4 +502,21 @@ static inline void set_kernel_text_ro(void) { } void flush_uprobe_xol_access(struct page *page, unsigned long uaddr, void *kaddr, unsigned long len); +/** + * secure_flush_area - ensure coherency across the secure boundary + * @addr: virtual address + * @size: size of region + * + * Ensure that the specified area of memory is coherent across the secure + * boundary from the non-secure side. This is used when calling secure + * firmware where the secure firmware does not ensure coherency. + */ +static inline void secure_flush_area(const void *addr, size_t size) +{ + phys_addr_t phys = __pa(addr); + + __cpuc_flush_dcache_area((void *)addr, size); + outer_flush_range(phys, phys + size); +} + #endif diff --git a/drivers/firmware/qcom_scm-32.c b/drivers/firmware/qcom_scm-32.c index 1bd6f9c34331..29e6850665eb 100644 --- a/drivers/firmware/qcom_scm-32.c +++ b/drivers/firmware/qcom_scm-32.c @@ -24,7 +24,6 @@ #include #include -#include #include #include "qcom_scm.h" @@ -219,8 +218,7 @@ static int __qcom_scm_call(const struct qcom_scm_command *cmd) * Flush the command buffer so that the secure world sees * the correct data. */ - __cpuc_flush_dcache_area((void *)cmd, cmd->len); - outer_flush_range(cmd_addr, cmd_addr + cmd->len); + secure_flush_area(cmd, cmd->len); ret = smc(cmd_addr); if (ret < 0) -- cgit From 0f64b247e63ac8c214efa6725366b42563ab138c Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 12 Aug 2015 16:45:02 +0100 Subject: ARM: 8414/1: __copy_to_user_memcpy: fix mmap semaphore usage The mmap semaphore should not be taken when page faults are disabled. Since pagefault_disable() no longer disables preemption, we now need to use faulthandler_disabled() in place of in_atomic(). Signed-off-by: Nicolas Pitre Tested-by: Mark Salter Signed-off-by: Russell King --- arch/arm/lib/uaccess_with_memcpy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 3e58d710013c..4b39af2dfda9 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -96,7 +96,7 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n) } /* the mmap semaphore is taken only if not in an atomic context */ - atomic = in_atomic(); + atomic = faulthandler_disabled(); if (!atomic) down_read(¤t->mm->mmap_sem); -- cgit From 3939f3345050b1ace675675c47d99fd7b606d9e3 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Aug 2015 04:03:33 +0100 Subject: ARM: 8418/1: add boot image dependencies to not generate invalid images U-Boot is often used to boot the kernel on ARM boards, but uImage is not built by "make all", so we are often inclined to do "make all uImage" to generate DTBs, modules and uImage in a single command, but we should notice a pitfall behind it. In fact, "make all uImage" could generate an invalid uImage if it is run with the parallel option (-j). You can reproduce this problem with the following procedure: [1] First, build "all" and "uImage" separately. You will get a valid uImage $ git clean -f -x -d $ export CROSS_COMPILE= $ make -s -j8 ARCH=arm multi_v7_defconfig $ make -s -j8 ARCH=arm all $ make -j8 ARCH=arm UIMAGE_LOADADDR=0x80208000 uImage CHK include/config/kernel.release CHK include/generated/uapi/linux/version.h CHK include/generated/utsrelease.h make[1]: `include/generated/mach-types.h' is up to date. CHK include/generated/timeconst.h CHK include/generated/bounds.h CHK include/generated/asm-offsets.h CALL scripts/checksyscalls.sh CHK include/generated/compile.h Kernel: arch/arm/boot/Image is ready Kernel: arch/arm/boot/zImage is ready UIMAGE arch/arm/boot/uImage Image Name: Linux-4.2.0-rc5-00156-gdd2384a-d Created: Sat Aug 8 23:21:35 2015 Image Type: ARM Linux Kernel Image (uncompressed) Data Size: 6138648 Bytes = 5994.77 kB = 5.85 MB Load Address: 80208000 Entry Point: 80208000 Image arch/arm/boot/uImage is ready $ ls -l arch/arm/boot/*Image -rwxrwxr-x 1 masahiro masahiro 13766656 Aug 8 23:20 arch/arm/boot/Image -rw-rw-r-- 1 masahiro masahiro 6138712 Aug 8 23:21 arch/arm/boot/uImage -rwxrwxr-x 1 masahiro masahiro 6138648 Aug 8 23:20 arch/arm/boot/zImage [2] Update some source file(s) $ touch init/main.c [3] Then, re-build "all" and "uImage" simultaneously. You will get an invalid uImage at random. $ make -j8 ARCH=arm UIMAGE_LOADADDR=0x80208000 all uImage CHK include/config/kernel.release CHK include/generated/uapi/linux/version.h CHK include/generated/utsrelease.h make[1]: `include/generated/mach-types.h' is up to date. CHK include/generated/timeconst.h CHK include/generated/bounds.h CHK include/generated/asm-offsets.h CALL scripts/checksyscalls.sh CC init/main.o CHK include/generated/compile.h LD init/built-in.o LINK vmlinux LD vmlinux.o MODPOST vmlinux.o GEN .version CHK include/generated/compile.h UPD include/generated/compile.h CC init/version.o LD init/built-in.o KSYM .tmp_kallsyms1.o KSYM .tmp_kallsyms2.o LD vmlinux SORTEX vmlinux SYSMAP System.map OBJCOPY arch/arm/boot/Image Building modules, stage 2. Kernel: arch/arm/boot/Image is ready GZIP arch/arm/boot/compressed/piggy.gzip AS arch/arm/boot/compressed/piggy.gzip.o Kernel: arch/arm/boot/Image is ready LD arch/arm/boot/compressed/vmlinux GZIP arch/arm/boot/compressed/piggy.gzip OBJCOPY arch/arm/boot/zImage Kernel: arch/arm/boot/zImage is ready UIMAGE arch/arm/boot/uImage Image Name: Linux-4.2.0-rc5-00156-gdd2384a-d Created: Sat Aug 8 23:23:14 2015 Image Type: ARM Linux Kernel Image (uncompressed) Data Size: 26472 Bytes = 25.85 kB = 0.03 MB Load Address: 80208000 Entry Point: 80208000 Image arch/arm/boot/uImage is ready MODPOST 192 modules AS arch/arm/boot/compressed/piggy.gzip.o LD arch/arm/boot/compressed/vmlinux OBJCOPY arch/arm/boot/zImage Kernel: arch/arm/boot/zImage is ready $ ls -l arch/arm/boot/*Image -rwxrwxr-x 1 masahiro masahiro 13766656 Aug 8 23:23 arch/arm/boot/Image -rw-rw-r-- 1 masahiro masahiro 26536 Aug 8 23:23 arch/arm/boot/uImage -rwxrwxr-x 1 masahiro masahiro 6138648 Aug 8 23:23 arch/arm/boot/zImage Please notice the uImage is extremely small when this issue is encountered. Besides, "Kernel: arch/arm/boot/zImage is ready" is displayed twice, before and after the uImage log. The root cause of this is the race condition between zImage and uImage. Actually, uImage depends on zImage, but the dependency between the two is only described in arch/arm/boot/Makefile. Because arch/arm/boot/Makefile is not included from the top-level Makefile, it cannot know the dependency between zImage and uImage. Consequently, when we run make with the parallel option, Kbuild updates vmlinux first, and then two different threads descends into the arch/arm/boot/Makefile almost at the same time, one for updating zImage and the other for uImage. While one thread is re-generating zImage, the other also tries to update zImage before creating uImage on top of that. zImage is overwritten by the slower thread and then uImage is created based on the half-written zImage. This is the reason why "Kernel: arch/arm/boot/zImage is ready" is displayed twice, and a broken uImage is created. The same problem could happen on bootpImage. This commit adds dependencies among Image, zImage, uImage, and bootpImage to arch/arm/Makefile, which is included from the top-level Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/arm/Makefile b/arch/arm/Makefile index 07ab3d203916..7451b447cc2d 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -312,6 +312,9 @@ INSTALL_TARGETS = zinstall uinstall install PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS) +bootpImage uImage: zImage +zImage: Image + $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $(boot)/$@ -- cgit From a5f4c561b3b19a9bc43a81da6382b0098ebbc1fb Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Thu, 13 Aug 2015 00:01:52 +0100 Subject: ARM: 8415/1: early fixmap support for earlycon Add early fixmap support, initially to support permanent, fixed mapping support for early console. A temporary, early pte is created which is migrated to a permanent mapping in paging_init. This is also needed since the attributes may change as the memory types are initialized. The 3MiB range of fixmap spans two pte tables, but currently only one pte is created for early fixmap support. Re-add FIX_KMAP_BEGIN to the index calculation in highmem.c since the index for kmap does not start at zero anymore. This reverts 4221e2e6b316 ("ARM: 8031/1: fixmap: remove FIX_KMAP_BEGIN and FIX_KMAP_END") to some extent. Cc: Mark Salter Cc: Kees Cook Cc: Laura Abbott Cc: Arnd Bergmann Cc: Ard Biesheuvel Signed-off-by: Rob Herring Signed-off-by: Stefan Agner Signed-off-by: Russell King --- arch/arm/Kconfig | 3 ++ arch/arm/include/asm/fixmap.h | 15 +++++++- arch/arm/kernel/setup.c | 4 ++ arch/arm/mm/highmem.c | 6 +-- arch/arm/mm/mmu.c | 88 +++++++++++++++++++++++++++++++++++++++---- 5 files changed, 105 insertions(+), 11 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a750c1425c3a..1bcda7cb2e04 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -188,6 +188,9 @@ config ARCH_HAS_ILOG2_U64 config ARCH_HAS_BANDGAP bool +config FIX_EARLYCON_MEM + def_bool y if MMU + config GENERIC_HWEIGHT bool default y diff --git a/arch/arm/include/asm/fixmap.h b/arch/arm/include/asm/fixmap.h index 0415eae1df27..58cfe9f1a687 100644 --- a/arch/arm/include/asm/fixmap.h +++ b/arch/arm/include/asm/fixmap.h @@ -6,9 +6,13 @@ #define FIXADDR_TOP (FIXADDR_END - PAGE_SIZE) #include +#include enum fixed_addresses { - FIX_KMAP_BEGIN, + FIX_EARLYCON_MEM_BASE, + __end_of_permanent_fixed_addresses, + + FIX_KMAP_BEGIN = __end_of_permanent_fixed_addresses, FIX_KMAP_END = FIX_KMAP_BEGIN + (KM_TYPE_NR * NR_CPUS) - 1, /* Support writing RO kernel text via kprobes, jump labels, etc. */ @@ -18,7 +22,16 @@ enum fixed_addresses { __end_of_fixed_addresses }; +#define FIXMAP_PAGE_COMMON (L_PTE_YOUNG | L_PTE_PRESENT | L_PTE_XN | L_PTE_DIRTY) + +#define FIXMAP_PAGE_NORMAL (FIXMAP_PAGE_COMMON | L_PTE_MT_WRITEBACK) + +/* Used by set_fixmap_(io|nocache), both meant for mapping a device */ +#define FIXMAP_PAGE_IO (FIXMAP_PAGE_COMMON | L_PTE_MT_DEV_SHARED | L_PTE_SHARED) +#define FIXMAP_PAGE_NOCACHE FIXMAP_PAGE_IO + void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot); +void __init early_fixmap_init(void); #include diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 6bbec6042052..e2ecee6b70ca 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -954,6 +955,9 @@ void __init setup_arch(char **cmdline_p) strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; + if (IS_ENABLED(CONFIG_FIX_EARLYCON_MEM)) + early_fixmap_init(); + parse_early_param(); #ifdef CONFIG_MMU diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index ee8dfa793989..9df5f09585ca 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -79,7 +79,7 @@ void *kmap_atomic(struct page *page) type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM /* @@ -106,7 +106,7 @@ void __kunmap_atomic(void *kvaddr) if (kvaddr >= (void *)FIXADDR_START) { type = kmap_atomic_idx(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); if (cache_is_vivt()) __cpuc_flush_dcache_area((void *)vaddr, PAGE_SIZE); @@ -138,7 +138,7 @@ void *kmap_atomic_pfn(unsigned long pfn) return page_address(page); type = kmap_atomic_idx_push(); - idx = type + KM_TYPE_NR * smp_processor_id(); + idx = FIX_KMAP_BEGIN + type + KM_TYPE_NR * smp_processor_id(); vaddr = __fix_to_virt(idx); #ifdef CONFIG_DEBUG_HIGHMEM BUG_ON(!pte_none(get_fixmap_pte(vaddr))); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6ca7d9aa896f..fb9e817d08bb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -357,6 +357,47 @@ const struct mem_type *get_mem_type(unsigned int type) } EXPORT_SYMBOL(get_mem_type); +static pte_t *(*pte_offset_fixmap)(pmd_t *dir, unsigned long addr); + +static pte_t bm_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS] + __aligned(PTE_HWTABLE_OFF + PTE_HWTABLE_SIZE) __initdata; + +static pte_t * __init pte_offset_early_fixmap(pmd_t *dir, unsigned long addr) +{ + return &bm_pte[pte_index(addr)]; +} + +static pte_t *pte_offset_late_fixmap(pmd_t *dir, unsigned long addr) +{ + return pte_offset_kernel(dir, addr); +} + +static inline pmd_t * __init fixmap_pmd(unsigned long addr) +{ + pgd_t *pgd = pgd_offset_k(addr); + pud_t *pud = pud_offset(pgd, addr); + pmd_t *pmd = pmd_offset(pud, addr); + + return pmd; +} + +void __init early_fixmap_init(void) +{ + pmd_t *pmd; + + /* + * The early fixmap range spans multiple pmds, for which + * we are not prepared: + */ + BUILD_BUG_ON((__fix_to_virt(__end_of_permanent_fixed_addresses) >> PMD_SHIFT) + != FIXADDR_TOP >> PMD_SHIFT); + + pmd = fixmap_pmd(FIXADDR_TOP); + pmd_populate_kernel(&init_mm, pmd, bm_pte); + + pte_offset_fixmap = pte_offset_early_fixmap; +} + /* * To avoid TLB flush broadcasts, this uses local_flush_tlb_kernel_range(). * As a result, this can only be called with preemption disabled, as under @@ -365,7 +406,7 @@ EXPORT_SYMBOL(get_mem_type); void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { unsigned long vaddr = __fix_to_virt(idx); - pte_t *pte = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + pte_t *pte = pte_offset_fixmap(pmd_off_k(vaddr), vaddr); /* Make sure fixmap region does not exceed available allocation. */ BUILD_BUG_ON(FIXADDR_START + (__end_of_fixed_addresses * PAGE_SIZE) > @@ -855,7 +896,7 @@ static void __init create_mapping(struct map_desc *md) } if ((md->type == MT_DEVICE || md->type == MT_ROM) && - md->virtual >= PAGE_OFFSET && + md->virtual >= PAGE_OFFSET && md->virtual < FIXADDR_START && (md->virtual < VMALLOC_START || md->virtual >= VMALLOC_END)) { pr_warn("BUG: mapping for 0x%08llx at 0x%08lx out of vmalloc space\n", (long long)__pfn_to_phys((u64)md->pfn), md->virtual); @@ -1213,10 +1254,10 @@ void __init arm_mm_memblock_reserve(void) /* * Set up the device mappings. Since we clear out the page tables for all - * mappings above VMALLOC_START, we will remove any debug device mappings. - * This means you have to be careful how you debug this function, or any - * called function. This means you can't use any function or debugging - * method which may touch any device, otherwise the kernel _will_ crash. + * mappings above VMALLOC_START, except early fixmap, we might remove debug + * device mappings. This means earlycon can be used to debug this function + * Any other function or debugging method which may touch any device _will_ + * crash the kernel. */ static void __init devicemaps_init(const struct machine_desc *mdesc) { @@ -1231,7 +1272,10 @@ static void __init devicemaps_init(const struct machine_desc *mdesc) early_trap_init(vectors); - for (addr = VMALLOC_START; addr; addr += PMD_SIZE) + /* + * Clear page table except top pmd used by early fixmaps + */ + for (addr = VMALLOC_START; addr < (FIXADDR_TOP & PMD_MASK); addr += PMD_SIZE) pmd_clear(pmd_off_k(addr)); /* @@ -1483,6 +1527,35 @@ void __init early_paging_init(const struct machine_desc *mdesc) #endif +static void __init early_fixmap_shutdown(void) +{ + int i; + unsigned long va = fix_to_virt(__end_of_permanent_fixed_addresses - 1); + + pte_offset_fixmap = pte_offset_late_fixmap; + pmd_clear(fixmap_pmd(va)); + local_flush_tlb_kernel_page(va); + + for (i = 0; i < __end_of_permanent_fixed_addresses; i++) { + pte_t *pte; + struct map_desc map; + + map.virtual = fix_to_virt(i); + pte = pte_offset_early_fixmap(pmd_off_k(map.virtual), map.virtual); + + /* Only i/o device mappings are supported ATM */ + if (pte_none(*pte) || + (pte_val(*pte) & L_PTE_MT_MASK) != L_PTE_MT_DEV_SHARED) + continue; + + map.pfn = pte_pfn(*pte); + map.type = MT_DEVICE; + map.length = PAGE_SIZE; + + create_mapping(&map); + } +} + /* * paging_init() sets up the page tables, initialises the zone memory * maps, and sets up the zero page, bad page and bad page tables. @@ -1495,6 +1568,7 @@ void __init paging_init(const struct machine_desc *mdesc) prepare_page_table(); map_lowmem(); dma_contiguous_remap(); + early_fixmap_shutdown(); devicemaps_init(mdesc); kmap_init(); tcm_init(); -- cgit From da4f295b4aeca0bca4ab406e70fb7087f93ede39 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Aug 2015 03:45:35 +0100 Subject: ARM: 8416/1: Feroceon: use of_iomap() to map register base The chain of of_address_to_resource() and ioremap() can be replaced with of_iomap(). Signed-off-by: Masahiro Yamada Acked-by: Olof Johansson Signed-off-by: Russell King --- arch/arm/mm/cache-feroceon-l2.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/arm/mm/cache-feroceon-l2.c b/arch/arm/mm/cache-feroceon-l2.c index 097181e08c25..5c1b7a7b9af6 100644 --- a/arch/arm/mm/cache-feroceon-l2.c +++ b/arch/arm/mm/cache-feroceon-l2.c @@ -368,7 +368,6 @@ int __init feroceon_of_init(void) struct device_node *node; void __iomem *base; bool l2_wt_override = false; - struct resource res; #if defined(CONFIG_CACHE_FEROCEON_L2_WRITETHROUGH) l2_wt_override = true; @@ -376,10 +375,7 @@ int __init feroceon_of_init(void) node = of_find_matching_node(NULL, feroceon_ids); if (node && of_device_is_compatible(node, "marvell,kirkwood-cache")) { - if (of_address_to_resource(node, 0, &res)) - return -ENODEV; - - base = ioremap(res.start, resource_size(&res)); + base = of_iomap(node, 0); if (!base) return -ENOMEM; -- cgit From 8901925d32232adb57ba1b4c26d0c0f9d521a78c Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 17 Aug 2015 03:59:52 +0100 Subject: ARM: 8417/1: refactor bitops functions with BIT_MASK() and BIT_WORD() Use BIT_MASK() and BIT_WORD() rather than hard-coding the size of the "long" type. Signed-off-by: Masahiro Yamada Signed-off-by: Russell King --- arch/arm/include/asm/bitops.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/arm/include/asm/bitops.h b/arch/arm/include/asm/bitops.h index 56380995f4c3..e943e6cee254 100644 --- a/arch/arm/include/asm/bitops.h +++ b/arch/arm/include/asm/bitops.h @@ -35,9 +35,9 @@ static inline void ____atomic_set_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); *p |= mask; @@ -47,9 +47,9 @@ static inline void ____atomic_set_bit(unsigned int bit, volatile unsigned long * static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); *p &= ~mask; @@ -59,9 +59,9 @@ static inline void ____atomic_clear_bit(unsigned int bit, volatile unsigned long static inline void ____atomic_change_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); *p ^= mask; @@ -73,9 +73,9 @@ ____atomic_test_and_set_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; unsigned int res; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); res = *p; @@ -90,9 +90,9 @@ ____atomic_test_and_clear_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; unsigned int res; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); res = *p; @@ -107,9 +107,9 @@ ____atomic_test_and_change_bit(unsigned int bit, volatile unsigned long *p) { unsigned long flags; unsigned int res; - unsigned long mask = 1UL << (bit & 31); + unsigned long mask = BIT_MASK(bit); - p += bit >> 5; + p += BIT_WORD(bit); raw_local_irq_save(flags); res = *p; -- cgit From 96231b2686b53f71838a335bdc404cb5285d1a01 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Tue, 18 Aug 2015 09:14:15 +0100 Subject: ARM: 8419/1: dma-mapping: harmonize definition of DMA_ERROR_CODE All architectures except arm that define DMA_ERROR_CODE are casting it to (dma_addr_t) - as it is always compared to dma_addr_t in arm as well this could be harmonized. Signed-off-by: Nicholas Mc Guire Acked-by: Marek Szyprowski Signed-off-by: Russell King --- arch/arm/include/asm/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index b52101d37ec7..a68b9d8a71fe 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -14,7 +14,7 @@ #include #include -#define DMA_ERROR_CODE (~0) +#define DMA_ERROR_CODE (~(dma_addr_t)0x0) extern struct dma_map_ops arm_dma_ops; extern struct dma_map_ops arm_coherent_dma_ops; -- cgit From 1eef5d2f1b461c120bcd82077edee5ec706ac53b Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 21:23:48 +0100 Subject: ARM: domains: switch to keeping domain value in register Rather than modifying both the domain access control register and our per-thread copy, modify only the domain access control register, and use the per-thread copy to save and restore the register over context switches. We can also avoid the explicit initialisation of the init thread_info structure. This allows us to avoid needing to gain access to the thread information at the uaccess control sites. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 20 +++++++++++++++----- arch/arm/include/asm/thread_info.h | 3 --- arch/arm/kernel/entry-armv.S | 2 ++ arch/arm/kernel/process.c | 13 ++++++++++--- 4 files changed, 27 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 6ddbe446425e..7f2941905714 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -59,6 +59,17 @@ #ifndef __ASSEMBLY__ +static inline unsigned int get_domain(void) +{ + unsigned int domain; + + asm( + "mrc p15, 0, %0, c3, c0 @ get domain" + : "=r" (domain)); + + return domain; +} + #ifdef CONFIG_CPU_USE_DOMAINS static inline void set_domain(unsigned val) { @@ -70,11 +81,10 @@ static inline void set_domain(unsigned val) #define modify_domain(dom,type) \ do { \ - struct thread_info *thread = current_thread_info(); \ - unsigned int domain = thread->cpu_domain; \ - domain &= ~domain_val(dom, DOMAIN_MANAGER); \ - thread->cpu_domain = domain | domain_val(dom, type); \ - set_domain(thread->cpu_domain); \ + unsigned int domain = get_domain(); \ + domain &= ~domain_val(dom, DOMAIN_MANAGER); \ + domain = domain | domain_val(dom, type); \ + set_domain(domain); \ } while (0) #else diff --git a/arch/arm/include/asm/thread_info.h b/arch/arm/include/asm/thread_info.h index bd32eded3e50..0a0aec410d8c 100644 --- a/arch/arm/include/asm/thread_info.h +++ b/arch/arm/include/asm/thread_info.h @@ -74,9 +74,6 @@ struct thread_info { .flags = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .cpu_domain = domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_IO, DOMAIN_CLIENT), \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 7dac3086e361..d19adcf6c580 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -770,6 +770,8 @@ ENTRY(__switch_to) ldr r4, [r2, #TI_TP_VALUE] ldr r5, [r2, #TI_TP_VALUE + 4] #ifdef CONFIG_CPU_USE_DOMAINS + mrc p15, 0, r6, c3, c0, 0 @ Get domain register + str r6, [r1, #TI_CPU_DOMAIN] @ Save old domain register ldr r6, [r2, #TI_CPU_DOMAIN] #endif switch_tls r1, r4, r5, r3, r7 diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f192a2a41719..e722f9b3c9b1 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -146,10 +146,9 @@ void __show_regs(struct pt_regs *regs) buf[0] = '\0'; #ifdef CONFIG_CPU_CP15_MMU { - unsigned int transbase, dac; + unsigned int transbase, dac = get_domain(); asm("mrc p15, 0, %0, c2, c0\n\t" - "mrc p15, 0, %1, c3, c0\n" - : "=r" (transbase), "=r" (dac)); + : "=r" (transbase)); snprintf(buf, sizeof(buf), " Table: %08x DAC: %08x", transbase, dac); } @@ -210,6 +209,14 @@ copy_thread(unsigned long clone_flags, unsigned long stack_start, memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); + /* + * Copy the initial value of the domain access control register + * from the current thread: thread->addr_limit will have been + * copied from the current thread via setup_thread_stack() in + * kernel/fork.c + */ + thread->cpu_domain = get_domain(); + if (likely(!(p->flags & PF_KTHREAD))) { *childregs = *current_pt_regs(); childregs->ARM_r0 = 0; -- cgit From 8e798706f7e9cd7f096aa194de90269dde83773e Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 22:36:24 +0100 Subject: ARM: domains: provide domain_mask() Provide a macro to generate the mask for a domain, rather than using domain_val(, DOMAIN_MANAGER) which won't work when CPU_USE_DOMAINS is turned off. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 7f2941905714..045b9b453bcd 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -55,7 +55,8 @@ #define DOMAIN_MANAGER 1 #endif -#define domain_val(dom,type) ((type) << (2*(dom))) +#define domain_mask(dom) ((3) << (2 * (dom))) +#define domain_val(dom,type) ((type) << (2 * (dom))) #ifndef __ASSEMBLY__ @@ -82,7 +83,7 @@ static inline void set_domain(unsigned val) #define modify_domain(dom,type) \ do { \ unsigned int domain = get_domain(); \ - domain &= ~domain_val(dom, DOMAIN_MANAGER); \ + domain &= ~domain_mask(dom); \ domain = domain | domain_val(dom, type); \ set_domain(domain); \ } while (0) -- cgit From 0171356a7708af01ad3224702b7f0aaa5b7a1399 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:23:26 +0100 Subject: ARM: domains: move initial domain setting value to asm/domains.h Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 6 ++++++ arch/arm/kernel/head.S | 5 +---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 045b9b453bcd..4218f88e8f7e 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -58,6 +58,12 @@ #define domain_mask(dom) ((3) << (2 * (dom))) #define domain_val(dom,type) ((type) << (2 * (dom))) +#define DACR_INIT \ + (domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_IO, DOMAIN_CLIENT)) + #ifndef __ASSEMBLY__ static inline unsigned int get_domain(void) diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index bd755d97e459..d56e5e9a9e1e 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -461,10 +461,7 @@ __enable_mmu: #ifdef CONFIG_ARM_LPAE mcrr p15, 0, r4, r5, c2 @ load TTBR0 #else - mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_IO, DOMAIN_CLIENT)) + mov r5, #DACR_INIT mcr p15, 0, r5, c3, c0, 0 @ load domain access register mcr p15, 0, r4, c2, c0, 0 @ load page table pointer #endif -- cgit From 3c2aed5b28819564e1a07b4686bd89802bcc4d6b Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:30:16 +0100 Subject: ARM: domains: get rid of manager mode for user domain Since we switched to early trap initialisation in 94e5a85b3be0 ("ARM: earlier initialization of vectors page") we haven't been writing directly to the vectors page, and so there's no need for this domain to be in manager mode. Switch it to client mode. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 2 +- arch/arm/kernel/traps.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 4218f88e8f7e..08b601e69ddc 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -59,7 +59,7 @@ #define domain_val(dom,type) ((type) << (2 * (dom))) #define DACR_INIT \ - (domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ + (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT)) diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index d358226236f2..969f9d9e665f 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -870,7 +870,6 @@ void __init early_trap_init(void *vectors_base) kuser_init(vectors_base); flush_icache_range(vectors, vectors + PAGE_SIZE * 2); - modify_domain(DOMAIN_USER, DOMAIN_CLIENT); #else /* ifndef CONFIG_CPU_V7M */ /* * on V7-M there is no need to copy the vector table to a dedicated -- cgit From a02d8dfd54cdf3b1b0464ccc2c1c4afe2c003a35 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:38:31 +0100 Subject: ARM: domains: keep vectors in separate domain Keep the machine vectors in its own domain to avoid software based user access control from making the vector code inaccessible, and thereby deadlocking the machine. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 4 +++- arch/arm/include/asm/pgtable-2level-hwdef.h | 1 + arch/arm/mm/mmu.c | 4 ++-- arch/arm/mm/pgd.c | 10 ++++++++++ 4 files changed, 16 insertions(+), 3 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 08b601e69ddc..396a12e486fe 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -43,6 +43,7 @@ #define DOMAIN_USER 1 #define DOMAIN_IO 0 #endif +#define DOMAIN_VECTORS 3 /* * Domain types @@ -62,7 +63,8 @@ (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_IO, DOMAIN_CLIENT)) + domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) #ifndef __ASSEMBLY__ diff --git a/arch/arm/include/asm/pgtable-2level-hwdef.h b/arch/arm/include/asm/pgtable-2level-hwdef.h index 5e68278e953e..d0131ee6f6af 100644 --- a/arch/arm/include/asm/pgtable-2level-hwdef.h +++ b/arch/arm/include/asm/pgtable-2level-hwdef.h @@ -23,6 +23,7 @@ #define PMD_PXNTABLE (_AT(pmdval_t, 1) << 2) /* v7 */ #define PMD_BIT4 (_AT(pmdval_t, 1) << 4) #define PMD_DOMAIN(x) (_AT(pmdval_t, (x)) << 5) +#define PMD_DOMAIN_MASK PMD_DOMAIN(0x0f) #define PMD_PROTECTION (_AT(pmdval_t, 1) << 9) /* v5 */ /* * - section diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 6ca7d9aa896f..a016de248034 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -291,13 +291,13 @@ static struct mem_type mem_types[] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, + .domain = DOMAIN_VECTORS, }, [MT_HIGH_VECTORS] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY | L_PTE_USER | L_PTE_RDONLY, .prot_l1 = PMD_TYPE_TABLE, - .domain = DOMAIN_USER, + .domain = DOMAIN_VECTORS, }, [MT_MEMORY_RWX] = { .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY, diff --git a/arch/arm/mm/pgd.c b/arch/arm/mm/pgd.c index a3681f11dd9f..e683db1b90a3 100644 --- a/arch/arm/mm/pgd.c +++ b/arch/arm/mm/pgd.c @@ -84,6 +84,16 @@ pgd_t *pgd_alloc(struct mm_struct *mm) if (!new_pte) goto no_pte; +#ifndef CONFIG_ARM_LPAE + /* + * Modify the PTE pointer to have the correct domain. This + * needs to be the vectors domain to avoid the low vectors + * being unmapped. + */ + pmd_val(*new_pmd) &= ~PMD_DOMAIN_MASK; + pmd_val(*new_pmd) |= PMD_DOMAIN(DOMAIN_VECTORS); +#endif + init_pud = pud_offset(init_pgd, 0); init_pmd = pmd_offset(init_pud, 0); init_pte = pte_offset_map(init_pmd, 0); -- cgit From 1fb6755f16872ad256c18cce2830f9087502dffd Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 21 Aug 2015 09:42:10 +0100 Subject: ARM: domains: remove DOMAIN_TABLE DOMAIN_TABLE is not used; in any case, it aliases to the kernel domain. Remove this definition. Signed-off-by: Russell King --- arch/arm/include/asm/domain.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 396a12e486fe..2be929549938 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -34,12 +34,10 @@ */ #ifndef CONFIG_IO_36 #define DOMAIN_KERNEL 0 -#define DOMAIN_TABLE 0 #define DOMAIN_USER 1 #define DOMAIN_IO 2 #else #define DOMAIN_KERNEL 2 -#define DOMAIN_TABLE 2 #define DOMAIN_USER 1 #define DOMAIN_IO 0 #endif @@ -62,7 +60,6 @@ #define DACR_INIT \ (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ - domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) -- cgit From 08446b129bbde34665c423d882f857a45b8c3aed Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 25 Aug 2015 14:59:15 +0100 Subject: ARM: mm: improve do_ldrd_abort macro Improve the do_ldrd_abort macro code - firstly, it inefficiently checks for the LDRD encoding by doing a multi-stage test of various bits. This can be simplified by generating a mask, bitmasking the instruction and then comparing the result. Secondly, we want to be able to test the result rather than branching to do_DataAbort, so remove the branch at the end and rename the macro to 'teq_ldrd' to reflect it's new usage. teq_ldrd macro returns 'eq' if the instruction was a LDRD. Signed-off-by: Russell King --- arch/arm/mm/abort-ev5t.S | 3 ++- arch/arm/mm/abort-ev5tj.S | 3 ++- arch/arm/mm/abort-ev6.S | 3 ++- arch/arm/mm/abort-macro.S | 13 +++++-------- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S index a0908d4653a3..c913031b79cc 100644 --- a/arch/arm/mm/abort-ev5t.S +++ b/arch/arm/mm/abort-ev5t.S @@ -22,7 +22,8 @@ ENTRY(v5t_early_abort) do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction bic r1, r1, #1 << 11 @ clear bits 11 of FSR - do_ldrd_abort tmp=ip, insn=r3 + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq do_DataAbort @ yes tst r3, #1 << 20 @ check write orreq r1, r1, #1 << 11 b do_DataAbort diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S index 4006b7a61264..1b80d71adb0f 100644 --- a/arch/arm/mm/abort-ev5tj.S +++ b/arch/arm/mm/abort-ev5tj.S @@ -24,7 +24,8 @@ ENTRY(v5tj_early_abort) bne do_DataAbort do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction - do_ldrd_abort tmp=ip, insn=r3 + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq do_DataAbort @ yes tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. b do_DataAbort diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 8c48c5c22a33..113704f30e9f 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -34,7 +34,8 @@ ENTRY(v6_early_abort) ldr r3, [r4] @ read aborted ARM instruction ARM_BE8(rev r3, r3) - do_ldrd_abort tmp=ip, insn=r3 + teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? + beq do_DataAbort @ yes tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. #endif diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S index 2cbf68ef0e83..50d6c0a900b1 100644 --- a/arch/arm/mm/abort-macro.S +++ b/arch/arm/mm/abort-macro.S @@ -29,12 +29,9 @@ not_thumb: * [7:4] == 1101 * [20] == 0 */ - .macro do_ldrd_abort, tmp, insn - tst \insn, #0x0e100000 @ [27:25,20] == 0 - bne not_ldrd - and \tmp, \insn, #0x000000f0 @ [7:4] == 1101 - cmp \tmp, #0x000000d0 - beq do_DataAbort -not_ldrd: + .macro teq_ldrd, tmp, insn + mov \tmp, #0x0e100000 + orr \tmp, #0x000000f0 + and \tmp, \insn, \tmp + teq \tmp, #0x000000d0 .endm - -- cgit From 3fba7e23f754a9a6e639b640fa2a393712ffe1b8 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 11:02:28 +0100 Subject: ARM: uaccess: provide uaccess_save_and_enable() and uaccess_restore() Provide uaccess_save_and_enable() and uaccess_restore() to permit control of userspace visibility to the kernel, and hook these into the appropriate places in the kernel where we need to access userspace. Signed-off-by: Russell King --- arch/arm/include/asm/futex.h | 19 ++++++++-- arch/arm/include/asm/uaccess.h | 71 +++++++++++++++++++++++++++++++++++--- arch/arm/kernel/armksyms.c | 6 ++-- arch/arm/lib/clear_user.S | 6 ++-- arch/arm/lib/copy_from_user.S | 6 ++-- arch/arm/lib/copy_to_user.S | 6 ++-- arch/arm/lib/uaccess_with_memcpy.c | 4 +-- 7 files changed, 97 insertions(+), 21 deletions(-) diff --git a/arch/arm/include/asm/futex.h b/arch/arm/include/asm/futex.h index 5eed82809d82..6795368ad023 100644 --- a/arch/arm/include/asm/futex.h +++ b/arch/arm/include/asm/futex.h @@ -22,8 +22,11 @@ #ifdef CONFIG_SMP #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ +({ \ + unsigned int __ua_flags; \ smp_mb(); \ prefetchw(uaddr); \ + __ua_flags = uaccess_save_and_enable(); \ __asm__ __volatile__( \ "1: ldrex %1, [%3]\n" \ " " insn "\n" \ @@ -34,12 +37,15 @@ __futex_atomic_ex_table("%5") \ : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "cc", "memory"); \ + uaccess_restore(__ua_flags); \ +}) static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + unsigned int __ua_flags; int ret; u32 val; @@ -49,6 +55,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, smp_mb(); /* Prefetching cannot fault */ prefetchw(uaddr); + __ua_flags = uaccess_save_and_enable(); __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: ldrex %1, [%4]\n" " teq %1, %2\n" @@ -61,6 +68,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "=&r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) : "cc", "memory"); + uaccess_restore(__ua_flags); smp_mb(); *uval = val; @@ -73,6 +81,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, #include #define __futex_atomic_op(insn, ret, oldval, tmp, uaddr, oparg) \ +({ \ + unsigned int __ua_flags = uaccess_save_and_enable(); \ __asm__ __volatile__( \ "1: " TUSER(ldr) " %1, [%3]\n" \ " " insn "\n" \ @@ -81,12 +91,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, __futex_atomic_ex_table("%5") \ : "=&r" (ret), "=&r" (oldval), "=&r" (tmp) \ : "r" (uaddr), "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "cc", "memory"); \ + uaccess_restore(__ua_flags); \ +}) static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newval) { + unsigned int __ua_flags; int ret = 0; u32 val; @@ -94,6 +107,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; preempt_disable(); + __ua_flags = uaccess_save_and_enable(); __asm__ __volatile__("@futex_atomic_cmpxchg_inatomic\n" "1: " TUSER(ldr) " %1, [%4]\n" " teq %1, %2\n" @@ -103,6 +117,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, : "+r" (ret), "=&r" (val) : "r" (oldval), "r" (newval), "r" (uaddr), "Ir" (-EFAULT) : "cc", "memory"); + uaccess_restore(__ua_flags); *uval = val; preempt_enable(); diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 74b17d09ef7a..82880132f941 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -49,6 +49,21 @@ struct exception_table_entry extern int fixup_exception(struct pt_regs *regs); +/* + * These two functions allow hooking accesses to userspace to increase + * system integrity by ensuring that the kernel can not inadvertantly + * perform such accesses (eg, via list poison values) which could then + * be exploited for priviledge escalation. + */ +static inline unsigned int uaccess_save_and_enable(void) +{ + return 0; +} + +static inline void uaccess_restore(unsigned int flags) +{ +} + /* * These two are intentionally not defined anywhere - if the kernel * code generates any references to them, that's a bug. @@ -165,6 +180,7 @@ extern int __get_user_64t_4(void *); register typeof(x) __r2 asm("r2"); \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ + unsigned int __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(__p))) { \ case 1: \ if (sizeof((x)) >= 8) \ @@ -192,6 +208,7 @@ extern int __get_user_64t_4(void *); break; \ default: __e = __get_user_bad(); break; \ } \ + uaccess_restore(__ua_flags); \ x = (typeof(*(p))) __r2; \ __e; \ }) @@ -224,6 +241,7 @@ extern int __put_user_8(void *, unsigned long long); register const typeof(*(p)) __user *__p asm("r0") = __tmp_p; \ register unsigned long __l asm("r1") = __limit; \ register int __e asm("r0"); \ + unsigned int __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(__p))) { \ case 1: \ __put_user_x(__r2, __p, __e, __l, 1); \ @@ -239,6 +257,7 @@ extern int __put_user_8(void *, unsigned long long); break; \ default: __e = __put_user_bad(); break; \ } \ + uaccess_restore(__ua_flags); \ __e; \ }) @@ -300,14 +319,17 @@ static inline void set_fs(mm_segment_t fs) do { \ unsigned long __gu_addr = (unsigned long)(ptr); \ unsigned long __gu_val; \ + unsigned int __ua_flags; \ __chk_user_ptr(ptr); \ might_fault(); \ + __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(ptr))) { \ case 1: __get_user_asm_byte(__gu_val, __gu_addr, err); break; \ case 2: __get_user_asm_half(__gu_val, __gu_addr, err); break; \ case 4: __get_user_asm_word(__gu_val, __gu_addr, err); break; \ default: (__gu_val) = __get_user_bad(); \ } \ + uaccess_restore(__ua_flags); \ (x) = (__typeof__(*(ptr)))__gu_val; \ } while (0) @@ -381,9 +403,11 @@ do { \ #define __put_user_err(x, ptr, err) \ do { \ unsigned long __pu_addr = (unsigned long)(ptr); \ + unsigned int __ua_flags; \ __typeof__(*(ptr)) __pu_val = (x); \ __chk_user_ptr(ptr); \ might_fault(); \ + __ua_flags = uaccess_save_and_enable(); \ switch (sizeof(*(ptr))) { \ case 1: __put_user_asm_byte(__pu_val, __pu_addr, err); break; \ case 2: __put_user_asm_half(__pu_val, __pu_addr, err); break; \ @@ -391,6 +415,7 @@ do { \ case 8: __put_user_asm_dword(__pu_val, __pu_addr, err); break; \ default: __put_user_bad(); \ } \ + uaccess_restore(__ua_flags); \ } while (0) #define __put_user_asm_byte(x, __pu_addr, err) \ @@ -474,11 +499,46 @@ do { \ #ifdef CONFIG_MMU -extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n); -extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n); -extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); -extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n); -extern unsigned long __must_check __clear_user_std(void __user *addr, unsigned long n); +extern unsigned long __must_check +arm_copy_from_user(void *to, const void __user *from, unsigned long n); + +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + unsigned int __ua_flags = uaccess_save_and_enable(); + n = arm_copy_from_user(to, from, n); + uaccess_restore(__ua_flags); + return n; +} + +extern unsigned long __must_check +arm_copy_to_user(void __user *to, const void *from, unsigned long n); +extern unsigned long __must_check +__copy_to_user_std(void __user *to, const void *from, unsigned long n); + +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + unsigned int __ua_flags = uaccess_save_and_enable(); + n = arm_copy_to_user(to, from, n); + uaccess_restore(__ua_flags); + return n; +} + +extern unsigned long __must_check +arm_clear_user(void __user *addr, unsigned long n); +extern unsigned long __must_check +__clear_user_std(void __user *addr, unsigned long n); + +static inline unsigned long __must_check +__clear_user(void __user *addr, unsigned long n) +{ + unsigned int __ua_flags = uaccess_save_and_enable(); + n = arm_clear_user(addr, n); + uaccess_restore(__ua_flags); + return n; +} + #else #define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0) #define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0) @@ -511,6 +571,7 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return n; } +/* These are from lib/ code, and use __get_user() and friends */ extern long strncpy_from_user(char *dest, const char __user *src, long count); extern __must_check long strlen_user(const char __user *str); diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index a88671cfe1ff..a35d72d30b56 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -91,9 +91,9 @@ EXPORT_SYMBOL(__memzero); #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); -EXPORT_SYMBOL(__copy_from_user); -EXPORT_SYMBOL(__copy_to_user); -EXPORT_SYMBOL(__clear_user); +EXPORT_SYMBOL(arm_copy_from_user); +EXPORT_SYMBOL(arm_copy_to_user); +EXPORT_SYMBOL(arm_clear_user); EXPORT_SYMBOL(__get_user_1); EXPORT_SYMBOL(__get_user_2); diff --git a/arch/arm/lib/clear_user.S b/arch/arm/lib/clear_user.S index 1710fd7db2d5..970d6c043774 100644 --- a/arch/arm/lib/clear_user.S +++ b/arch/arm/lib/clear_user.S @@ -12,14 +12,14 @@ .text -/* Prototype: int __clear_user(void *addr, size_t sz) +/* Prototype: unsigned long arm_clear_user(void *addr, size_t sz) * Purpose : clear some user memory * Params : addr - user memory address to clear * : sz - number of bytes to clear * Returns : number of bytes NOT cleared */ ENTRY(__clear_user_std) -WEAK(__clear_user) +WEAK(arm_clear_user) stmfd sp!, {r1, lr} mov r2, #0 cmp r1, #4 @@ -44,7 +44,7 @@ WEAK(__clear_user) USER( strnebt r2, [r0]) mov r0, #0 ldmfd sp!, {r1, pc} -ENDPROC(__clear_user) +ENDPROC(arm_clear_user) ENDPROC(__clear_user_std) .pushsection .text.fixup,"ax" diff --git a/arch/arm/lib/copy_from_user.S b/arch/arm/lib/copy_from_user.S index 7a235b9952be..1512bebfbf1b 100644 --- a/arch/arm/lib/copy_from_user.S +++ b/arch/arm/lib/copy_from_user.S @@ -17,7 +17,7 @@ /* * Prototype: * - * size_t __copy_from_user(void *to, const void *from, size_t n) + * size_t arm_copy_from_user(void *to, const void *from, size_t n) * * Purpose: * @@ -89,11 +89,11 @@ .text -ENTRY(__copy_from_user) +ENTRY(arm_copy_from_user) #include "copy_template.S" -ENDPROC(__copy_from_user) +ENDPROC(arm_copy_from_user) .pushsection .fixup,"ax" .align 0 diff --git a/arch/arm/lib/copy_to_user.S b/arch/arm/lib/copy_to_user.S index 9648b0675a3e..caf5019d8161 100644 --- a/arch/arm/lib/copy_to_user.S +++ b/arch/arm/lib/copy_to_user.S @@ -17,7 +17,7 @@ /* * Prototype: * - * size_t __copy_to_user(void *to, const void *from, size_t n) + * size_t arm_copy_to_user(void *to, const void *from, size_t n) * * Purpose: * @@ -93,11 +93,11 @@ .text ENTRY(__copy_to_user_std) -WEAK(__copy_to_user) +WEAK(arm_copy_to_user) #include "copy_template.S" -ENDPROC(__copy_to_user) +ENDPROC(arm_copy_to_user) ENDPROC(__copy_to_user_std) .pushsection .text.fixup,"ax" diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c index 3e58d710013c..77f020e75ccd 100644 --- a/arch/arm/lib/uaccess_with_memcpy.c +++ b/arch/arm/lib/uaccess_with_memcpy.c @@ -136,7 +136,7 @@ out: } unsigned long -__copy_to_user(void __user *to, const void *from, unsigned long n) +arm_copy_to_user(void __user *to, const void *from, unsigned long n) { /* * This test is stubbed out of the main function above to keep @@ -190,7 +190,7 @@ out: return n; } -unsigned long __clear_user(void __user *addr, unsigned long n) +unsigned long arm_clear_user(void __user *addr, unsigned long n) { /* See rational for this in __copy_to_user() above. */ if (n < 64) -- cgit From 9205b797dbe519a629267ec8c5766cd973d35063 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 24 Aug 2015 21:49:30 +0100 Subject: ARM: 8421/1: smp: Collapse arch_cpu_idle_dead() into cpu_die() The only caller of cpu_die() on ARM is arch_cpu_idle_dead(), so let's simplify the code by renaming cpu_die() to arch_cpu_idle_dead(). While were here, drop the __ref annotation because __cpuinit is gone nowadays. Signed-off-by: Stephen Boyd Signed-off-by: Russell King --- arch/arm/include/asm/smp.h | 1 - arch/arm/kernel/process.c | 7 ------- arch/arm/kernel/smp.c | 2 +- 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h index 318ce89eeff7..ef356659b4f4 100644 --- a/arch/arm/include/asm/smp.h +++ b/arch/arm/include/asm/smp.h @@ -74,7 +74,6 @@ extern void secondary_startup_arm(void); extern int __cpu_disable(void); extern void __cpu_die(unsigned int cpu); -extern void cpu_die(void); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index f192a2a41719..358984b7f249 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -91,13 +91,6 @@ void arch_cpu_idle_exit(void) ledtrig_cpu(CPU_LED_IDLE_END); } -#ifdef CONFIG_HOTPLUG_CPU -void arch_cpu_idle_dead(void) -{ - cpu_die(); -} -#endif - void __show_regs(struct pt_regs *regs) { unsigned long flags; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 3cd846f48eaf..0aad7cdf2e58 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -266,7 +266,7 @@ void __cpu_die(unsigned int cpu) * of the other hotplug-cpu capable cores, so presumably coming * out of idle fixes this. */ -void __ref cpu_die(void) +void arch_cpu_idle_dead(void) { unsigned int cpu = smp_processor_id(); -- cgit From aa06e5c1f9c2b466712be904cc5b56a813e24cfd Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 26 Aug 2015 20:07:25 +0100 Subject: ARM: entry: get rid of multiple macro definitions The following structure is just asking for trouble: #ifdef CONFIG_symbol .macro foo ... .endm .macro bar ... .endm .macro baz ... .endm #else .macro foo ... .endm .macro bar ... .endm #ifdef CONFIG_symbol2 .macro baz ... .endm #else .macro baz ... .endm #endif #endif such as one defintion being updated, but the other definitions miss out. Where the contents of a macro needs to be conditional, the hint is in the first clause of this very sentence. "contents" "conditional". Not multiple separate definitions, especially not when much of the macro is the same between different configs. This patch fixes this bad style, which had caused the Thumb2 code to miss-out on the uaccess updates. Signed-off-by: Russell King --- arch/arm/kernel/entry-header.S | 109 +++++++++++++++++------------------------ 1 file changed, 44 insertions(+), 65 deletions(-) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index 1a0045abead7..d47b5161b029 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -196,7 +196,7 @@ msr cpsr_c, \rtemp @ switch back to the SVC mode .endm -#ifndef CONFIG_THUMB2_KERNEL + .macro svc_exit, rpsr, irq = 0 .if \irq != 0 @ IRQs already off @@ -215,6 +215,9 @@ blne trace_hardirqs_off #endif .endif + +#ifndef CONFIG_THUMB2_KERNEL + @ ARM mode SVC restore msr spsr_cxsf, \rpsr #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_32v6K) @ We must avoid clrex due to Cortex-A15 erratum #830321 @@ -222,6 +225,20 @@ strex r1, r2, [r0] @ clear the exclusive monitor #endif ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr +#else + @ Thumb mode SVC restore + ldr lr, [sp, #S_SP] @ top of the stack + ldrd r0, r1, [sp, #S_LR] @ calling lr and pc + + @ We must avoid clrex due to Cortex-A15 erratum #830321 + strex r2, r1, [sp, #S_LR] @ clear the exclusive monitor + + stmdb lr!, {r0, r1, \rpsr} @ calling lr and rfe context + ldmia sp, {r0 - r12} + mov sp, lr + ldr lr, [sp], #4 + rfeia sp! +#endif .endm @ @@ -241,6 +258,8 @@ @ on the stack remains correct). @ .macro svc_exit_via_fiq +#ifndef CONFIG_THUMB2_KERNEL + @ ARM mode restore mov r0, sp ldmib r0, {r1 - r14} @ abort is deadly from here onward (it will @ clobber state restored below) @@ -250,9 +269,26 @@ msr spsr_cxsf, r9 ldr r0, [r0, #S_R0] ldmia r8, {pc}^ +#else + @ Thumb mode restore + add r0, sp, #S_R2 + ldr lr, [sp, #S_LR] + ldr sp, [sp, #S_SP] @ abort is deadly from here onward (it will + @ clobber state restored below) + ldmia r0, {r2 - r12} + mov r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT + msr cpsr_c, r1 + sub r0, #S_R2 + add r8, r0, #S_PC + ldmia r0, {r0 - r1} + rfeia r8 +#endif .endm + .macro restore_user_regs, fast = 0, offset = 0 +#ifndef CONFIG_THUMB2_KERNEL + @ ARM mode restore mov r2, sp ldr r1, [r2, #\offset + S_PSR] @ get calling cpsr ldr lr, [r2, #\offset + S_PC]! @ get pc @@ -270,72 +306,16 @@ @ after ldm {}^ add sp, sp, #\offset + S_FRAME_SIZE movs pc, lr @ return & move spsr_svc into cpsr - .endm - -#else /* CONFIG_THUMB2_KERNEL */ - .macro svc_exit, rpsr, irq = 0 - .if \irq != 0 - @ IRQs already off -#ifdef CONFIG_TRACE_IRQFLAGS - @ The parent context IRQs must have been enabled to get here in - @ the first place, so there's no point checking the PSR I bit. - bl trace_hardirqs_on -#endif - .else - @ IRQs off again before pulling preserved data off the stack - disable_irq_notrace -#ifdef CONFIG_TRACE_IRQFLAGS - tst \rpsr, #PSR_I_BIT - bleq trace_hardirqs_on - tst \rpsr, #PSR_I_BIT - blne trace_hardirqs_off -#endif - .endif - ldr lr, [sp, #S_SP] @ top of the stack - ldrd r0, r1, [sp, #S_LR] @ calling lr and pc - - @ We must avoid clrex due to Cortex-A15 erratum #830321 - strex r2, r1, [sp, #S_LR] @ clear the exclusive monitor - - stmdb lr!, {r0, r1, \rpsr} @ calling lr and rfe context - ldmia sp, {r0 - r12} - mov sp, lr - ldr lr, [sp], #4 - rfeia sp! - .endm - - @ - @ svc_exit_via_fiq - like svc_exit but switches to FIQ mode before exit - @ - @ For full details see non-Thumb implementation above. - @ - .macro svc_exit_via_fiq - add r0, sp, #S_R2 - ldr lr, [sp, #S_LR] - ldr sp, [sp, #S_SP] @ abort is deadly from here onward (it will - @ clobber state restored below) - ldmia r0, {r2 - r12} - mov r1, #FIQ_MODE | PSR_I_BIT | PSR_F_BIT - msr cpsr_c, r1 - sub r0, #S_R2 - add r8, r0, #S_PC - ldmia r0, {r0 - r1} - rfeia r8 - .endm - -#ifdef CONFIG_CPU_V7M - /* - * Note we don't need to do clrex here as clearing the local monitor is - * part of each exception entry and exit sequence. - */ - .macro restore_user_regs, fast = 0, offset = 0 +#elif defined(CONFIG_CPU_V7M) + @ V7M restore. + @ Note that we don't need to do clrex here as clearing the local + @ monitor is part of the exception entry and exit sequence. .if \offset add sp, #\offset .endif v7m_exception_slow_exit ret_r0 = \fast - .endm -#else /* ifdef CONFIG_CPU_V7M */ - .macro restore_user_regs, fast = 0, offset = 0 +#else + @ Thumb mode restore mov r2, sp load_user_sp_lr r2, r3, \offset + S_SP @ calling sp, lr ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr @@ -353,9 +333,8 @@ .endif add sp, sp, #S_FRAME_SIZE - S_SP movs pc, lr @ return & move spsr_svc into cpsr - .endm -#endif /* ifdef CONFIG_CPU_V7M / else */ #endif /* !CONFIG_THUMB2_KERNEL */ + .endm /* * Context tracking subsystem. Used to instrument transitions -- cgit From 2190fed67ba6f3e8129513929f2395843645e928 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 20 Aug 2015 10:32:02 +0100 Subject: ARM: entry: provide uaccess assembly macro hooks Provide hooks into the kernel entry and exit paths to permit control of userspace visibility to the kernel. The intended use is: - on entry to kernel from user, uaccess_disable will be called to disable userspace visibility - on exit from kernel to user, uaccess_enable will be called to enable userspace visibility - on entry from a kernel exception, uaccess_save_and_disable will be called to save the current userspace visibility setting, and disable access - on exit from a kernel exception, uaccess_restore will be called to restore the userspace visibility as it was before the exception occurred. These hooks allows us to keep userspace visibility disabled for the vast majority of the kernel, except for localised regions where we want to explicitly access userspace. Signed-off-by: Russell King --- arch/arm/include/asm/assembler.h | 17 +++++++++++++++++ arch/arm/kernel/entry-armv.S | 30 ++++++++++++++++++++++-------- arch/arm/kernel/entry-common.S | 2 ++ arch/arm/kernel/entry-header.S | 3 +++ arch/arm/mm/abort-ev4.S | 1 + arch/arm/mm/abort-ev5t.S | 1 + arch/arm/mm/abort-ev5tj.S | 1 + arch/arm/mm/abort-ev6.S | 7 ++++--- arch/arm/mm/abort-ev7.S | 1 + arch/arm/mm/abort-lv4t.S | 2 ++ arch/arm/mm/abort-macro.S | 1 + 11 files changed, 55 insertions(+), 11 deletions(-) diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 4abe57279c66..a91177043467 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -445,6 +445,23 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm + .macro uaccess_disable, tmp, isb=1 + .endm + + .macro uaccess_enable, tmp, isb=1 + .endm + + .macro uaccess_save, tmp + .endm + + .macro uaccess_restore + .endm + + .macro uaccess_save_and_disable, tmp + uaccess_save \tmp + uaccess_disable \tmp + .endm + .irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo .macro ret\c, reg #if __LINUX_ARM_ARCH__ < 6 diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index d19adcf6c580..61f00a3f3047 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -149,10 +149,10 @@ ENDPROC(__und_invalid) #define SPFIX(code...) #endif - .macro svc_entry, stack_hole=0, trace=1 + .macro svc_entry, stack_hole=0, trace=1, uaccess=1 UNWIND(.fnstart ) UNWIND(.save {r0 - pc} ) - sub sp, sp, #(S_FRAME_SIZE + \stack_hole - 4) + sub sp, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4) #ifdef CONFIG_THUMB2_KERNEL SPFIX( str r0, [sp] ) @ temporarily saved SPFIX( mov r0, sp ) @@ -167,7 +167,7 @@ ENDPROC(__und_invalid) ldmia r0, {r3 - r5} add r7, sp, #S_SP - 4 @ here for interlock avoidance mov r6, #-1 @ "" "" "" "" - add r2, sp, #(S_FRAME_SIZE + \stack_hole - 4) + add r2, sp, #(S_FRAME_SIZE + 8 + \stack_hole - 4) SPFIX( addeq r2, r2, #4 ) str r3, [sp, #-4]! @ save the "real" r0 copied @ from the exception stack @@ -185,6 +185,11 @@ ENDPROC(__und_invalid) @ stmia r7, {r2 - r6} + uaccess_save r0 + .if \uaccess + uaccess_disable r0 + .endif + .if \trace #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -194,7 +199,7 @@ ENDPROC(__und_invalid) .align 5 __dabt_svc: - svc_entry + svc_entry uaccess=0 mov r2, sp dabt_helper THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR @@ -368,7 +373,7 @@ ENDPROC(__fiq_abt) #error "sizeof(struct pt_regs) must be a multiple of 8" #endif - .macro usr_entry, trace=1 + .macro usr_entry, trace=1, uaccess=1 UNWIND(.fnstart ) UNWIND(.cantunwind ) @ don't unwind the user space sub sp, sp, #S_FRAME_SIZE @@ -400,6 +405,10 @@ ENDPROC(__fiq_abt) ARM( stmdb r0, {sp, lr}^ ) THUMB( store_user_sp_lr r0, r1, S_SP - S_PC ) + .if \uaccess + uaccess_disable ip + .endif + @ Enable the alignment trap while in kernel mode ATRAP( teq r8, r7) ATRAP( mcrne p15, 0, r8, c1, c0, 0) @@ -435,7 +444,7 @@ ENDPROC(__fiq_abt) .align 5 __dabt_usr: - usr_entry + usr_entry uaccess=0 kuser_cmpxchg_check mov r2, sp dabt_helper @@ -458,7 +467,7 @@ ENDPROC(__irq_usr) .align 5 __und_usr: - usr_entry + usr_entry uaccess=0 mov r2, r4 mov r3, r5 @@ -484,6 +493,8 @@ __und_usr: 1: ldrt r0, [r4] ARM_BE8(rev r0, r0) @ little endian instruction + uaccess_disable ip + @ r0 = 32-bit ARM instruction which caused the exception @ r2 = PC value for the following instruction (:= regs->ARM_pc) @ r4 = PC value for the faulting instruction @@ -518,9 +529,10 @@ __und_usr_thumb: 2: ldrht r5, [r4] ARM_BE8(rev16 r5, r5) @ little endian instruction cmp r5, #0xe800 @ 32bit instruction if xx != 0 - blo __und_usr_fault_16 @ 16bit undefined instruction + blo __und_usr_fault_16_pan @ 16bit undefined instruction 3: ldrht r0, [r2] ARM_BE8(rev16 r0, r0) @ little endian instruction + uaccess_disable ip add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 @@ -715,6 +727,8 @@ ENDPROC(no_fp) __und_usr_fault_32: mov r1, #4 b 1f +__und_usr_fault_16_pan: + uaccess_disable ip __und_usr_fault_16: mov r1, #2 1: mov r0, sp diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 92828a1dec80..189154980703 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -173,6 +173,8 @@ ENTRY(vector_swi) USER( ldr scno, [lr, #-4] ) @ get SWI instruction #endif + uaccess_disable tbl + adr tbl, sys_call_table @ load syscall table pointer #if defined(CONFIG_OABI_COMPAT) diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index d47b5161b029..0d22ad206d52 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -215,6 +215,7 @@ blne trace_hardirqs_off #endif .endif + uaccess_restore #ifndef CONFIG_THUMB2_KERNEL @ ARM mode SVC restore @@ -258,6 +259,7 @@ @ on the stack remains correct). @ .macro svc_exit_via_fiq + uaccess_restore #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r0, sp @@ -287,6 +289,7 @@ .macro restore_user_regs, fast = 0, offset = 0 + uaccess_enable r1, isb=0 #ifndef CONFIG_THUMB2_KERNEL @ ARM mode restore mov r2, sp diff --git a/arch/arm/mm/abort-ev4.S b/arch/arm/mm/abort-ev4.S index 54473cd4aba9..b3b31e30cadd 100644 --- a/arch/arm/mm/abort-ev4.S +++ b/arch/arm/mm/abort-ev4.S @@ -19,6 +19,7 @@ ENTRY(v4_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR ldr r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable userspace access bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR tst r3, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. diff --git a/arch/arm/mm/abort-ev5t.S b/arch/arm/mm/abort-ev5t.S index c913031b79cc..a6a381a6caa5 100644 --- a/arch/arm/mm/abort-ev5t.S +++ b/arch/arm/mm/abort-ev5t.S @@ -21,6 +21,7 @@ ENTRY(v5t_early_abort) mrc p15, 0, r0, c6, c0, 0 @ get FAR do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable user access bic r1, r1, #1 << 11 @ clear bits 11 of FSR teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? beq do_DataAbort @ yes diff --git a/arch/arm/mm/abort-ev5tj.S b/arch/arm/mm/abort-ev5tj.S index 1b80d71adb0f..00ab011bef58 100644 --- a/arch/arm/mm/abort-ev5tj.S +++ b/arch/arm/mm/abort-ev5tj.S @@ -24,6 +24,7 @@ ENTRY(v5tj_early_abort) bne do_DataAbort do_thumb_abort fsr=r1, pc=r4, psr=r5, tmp=r3 ldreq r3, [r4] @ read aborted ARM instruction + uaccess_disable ip @ disable userspace access teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? beq do_DataAbort @ yes tst r3, #1 << 20 @ L = 0 -> write diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 113704f30e9f..8801a15aa105 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -26,17 +26,18 @@ ENTRY(v6_early_abort) ldr ip, =0x4107b36 mrc p15, 0, r3, c0, c0, 0 @ get processor id teq ip, r3, lsr #4 @ r0 ARM1136? - bne do_DataAbort + bne 1f tst r5, #PSR_J_BIT @ Java? tsteq r5, #PSR_T_BIT @ Thumb? - bne do_DataAbort + bne 1f bic r1, r1, #1 << 11 @ clear bit 11 of FSR ldr r3, [r4] @ read aborted ARM instruction ARM_BE8(rev r3, r3) teq_ldrd tmp=ip, insn=r3 @ insn was LDRD? - beq do_DataAbort @ yes + beq 1f @ yes tst r3, #1 << 20 @ L = 0 -> write orreq r1, r1, #1 << 11 @ yes. #endif +1: uaccess_disable ip @ disable userspace access b do_DataAbort diff --git a/arch/arm/mm/abort-ev7.S b/arch/arm/mm/abort-ev7.S index 4812ad054214..e8d0e08c227f 100644 --- a/arch/arm/mm/abort-ev7.S +++ b/arch/arm/mm/abort-ev7.S @@ -15,6 +15,7 @@ ENTRY(v7_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR + uaccess_disable ip @ disable userspace access /* * V6 code adjusts the returned DFSR. diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S index f3982580c273..6d8e8e3365d1 100644 --- a/arch/arm/mm/abort-lv4t.S +++ b/arch/arm/mm/abort-lv4t.S @@ -26,6 +26,7 @@ ENTRY(v4t_late_abort) #endif bne .data_thumb_abort ldr r8, [r4] @ read arm instruction + uaccess_disable ip @ disable userspace access tst r8, #1 << 20 @ L = 1 -> write? orreq r1, r1, #1 << 11 @ yes. and r7, r8, #15 << 24 @@ -155,6 +156,7 @@ ENTRY(v4t_late_abort) .data_thumb_abort: ldrh r8, [r4] @ read instruction + uaccess_disable ip @ disable userspace access tst r8, #1 << 11 @ L = 1 -> write? orreq r1, r1, #1 << 8 @ yes and r7, r8, #15 << 12 diff --git a/arch/arm/mm/abort-macro.S b/arch/arm/mm/abort-macro.S index 50d6c0a900b1..4509bee4e081 100644 --- a/arch/arm/mm/abort-macro.S +++ b/arch/arm/mm/abort-macro.S @@ -13,6 +13,7 @@ tst \psr, #PSR_T_BIT beq not_thumb ldrh \tmp, [\pc] @ Read aborted Thumb instruction + uaccess_disable ip @ disable userspace access and \tmp, \tmp, # 0xfe00 @ Mask opcode field cmp \tmp, # 0x5600 @ Is it ldrsb? orreq \tmp, \tmp, #1 << 11 @ Set L-bit if yes -- cgit From a5e090acbf545c0a3b04080f8a488b17ec41fe02 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 19 Aug 2015 20:40:41 +0100 Subject: ARM: software-based priviledged-no-access support Provide a software-based implementation of the priviledged no access support found in ARMv8.1. Userspace pages are mapped using a different domain number from the kernel and IO mappings. If we switch the user domain to "no access" when we enter the kernel, we can prevent the kernel from touching userspace. However, the kernel needs to be able to access userspace via the various user accessor functions. With the wrapping in the previous patch, we can temporarily enable access when the kernel needs user access, and re-disable it afterwards. This allows us to trap non-intended accesses to userspace, eg, caused by an inadvertent dereference of the LIST_POISON* values, which, with appropriate user mappings setup, can be made to succeed. This in turn can allow use-after-free bugs to be further exploited than would otherwise be possible. Signed-off-by: Russell King --- arch/arm/Kconfig | 15 +++++++++++++++ arch/arm/include/asm/assembler.h | 30 ++++++++++++++++++++++++++++++ arch/arm/include/asm/domain.h | 21 +++++++++++++++++++-- arch/arm/include/asm/uaccess.h | 14 ++++++++++++++ arch/arm/kernel/process.c | 36 ++++++++++++++++++++++++++++++------ arch/arm/kernel/swp_emulate.c | 3 +++ arch/arm/lib/csumpartialcopyuser.S | 14 ++++++++++++++ 7 files changed, 125 insertions(+), 8 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a750c1425c3a..e15d5ed4d5f1 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1694,6 +1694,21 @@ config HIGHPTE bool "Allocate 2nd-level pagetables from highmem" depends on HIGHMEM +config CPU_SW_DOMAIN_PAN + bool "Enable use of CPU domains to implement privileged no-access" + depends on MMU && !ARM_LPAE + default y + help + Increase kernel security by ensuring that normal kernel accesses + are unable to access userspace addresses. This can help prevent + use-after-free bugs becoming an exploitable privilege escalation + by ensuring that magic values (such as LIST_POISON) will always + fault when dereferenced. + + CPUs with low-vector mappings use a best-efforts implementation. + Their lower 1MB needs to remain accessible for the vectors, but + the remainder of userspace will become appropriately inaccessible. + config HW_PERF_EVENTS bool "Enable hardware performance counter support for perf events" depends on PERF_EVENTS diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index a91177043467..3ae0eda5e64f 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -446,15 +446,45 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) .endm .macro uaccess_disable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_DISABLE + mcr p15, 0, \tmp, c3, c0, 0 @ Set domain register + .if \isb + instr_sync + .endif +#endif .endm .macro uaccess_enable, tmp, isb=1 +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Whenever we re-enter userspace, the domains should always be + * set appropriately. + */ + mov \tmp, #DACR_UACCESS_ENABLE + mcr p15, 0, \tmp, c3, c0, 0 + .if \isb + instr_sync + .endif +#endif .endm .macro uaccess_save, tmp +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + mrc p15, 0, \tmp, c3, c0, 0 + str \tmp, [sp, #S_FRAME_SIZE] +#endif .endm .macro uaccess_restore +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + ldr r0, [sp, #S_FRAME_SIZE] + mcr p15, 0, r0, c3, c0, 0 +#endif .endm .macro uaccess_save_and_disable, tmp diff --git a/arch/arm/include/asm/domain.h b/arch/arm/include/asm/domain.h index 2be929549938..e878129f2fee 100644 --- a/arch/arm/include/asm/domain.h +++ b/arch/arm/include/asm/domain.h @@ -57,11 +57,29 @@ #define domain_mask(dom) ((3) << (2 * (dom))) #define domain_val(dom,type) ((type) << (2 * (dom))) +#ifdef CONFIG_CPU_SW_DOMAIN_PAN +#define DACR_INIT \ + (domain_val(DOMAIN_USER, DOMAIN_NOACCESS) | \ + domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ + domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) +#else #define DACR_INIT \ (domain_val(DOMAIN_USER, DOMAIN_CLIENT) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT)) +#endif + +#define __DACR_DEFAULT \ + domain_val(DOMAIN_KERNEL, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_IO, DOMAIN_CLIENT) | \ + domain_val(DOMAIN_VECTORS, DOMAIN_CLIENT) + +#define DACR_UACCESS_DISABLE \ + (__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_NOACCESS)) +#define DACR_UACCESS_ENABLE \ + (__DACR_DEFAULT | domain_val(DOMAIN_USER, DOMAIN_CLIENT)) #ifndef __ASSEMBLY__ @@ -76,7 +94,6 @@ static inline unsigned int get_domain(void) return domain; } -#ifdef CONFIG_CPU_USE_DOMAINS static inline void set_domain(unsigned val) { asm volatile( @@ -85,6 +102,7 @@ static inline void set_domain(unsigned val) isb(); } +#ifdef CONFIG_CPU_USE_DOMAINS #define modify_domain(dom,type) \ do { \ unsigned int domain = get_domain(); \ @@ -94,7 +112,6 @@ static inline void set_domain(unsigned val) } while (0) #else -static inline void set_domain(unsigned val) { } static inline void modify_domain(unsigned dom, unsigned type) { } #endif diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 82880132f941..01bae13b2cea 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -57,11 +57,25 @@ extern int fixup_exception(struct pt_regs *regs); */ static inline unsigned int uaccess_save_and_enable(void) { +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + unsigned int old_domain = get_domain(); + + /* Set the current domain access to permit user accesses */ + set_domain((old_domain & ~domain_mask(DOMAIN_USER)) | + domain_val(DOMAIN_USER, DOMAIN_CLIENT)); + + return old_domain; +#else return 0; +#endif } static inline void uaccess_restore(unsigned int flags) { +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* Restore the user access mask */ + set_domain(flags); +#endif } /* diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index e722f9b3c9b1..3f18098dfd08 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -129,12 +129,36 @@ void __show_regs(struct pt_regs *regs) buf[4] = '\0'; #ifndef CONFIG_CPU_V7M - printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n", - buf, interrupts_enabled(regs) ? "n" : "ff", - fast_interrupts_enabled(regs) ? "n" : "ff", - processor_modes[processor_mode(regs)], - isa_modes[isa_mode(regs)], - get_fs() == get_ds() ? "kernel" : "user"); + { + unsigned int domain = get_domain(); + const char *segment; + +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + /* + * Get the domain register for the parent context. In user + * mode, we don't save the DACR, so lets use what it should + * be. For other modes, we place it after the pt_regs struct. + */ + if (user_mode(regs)) + domain = DACR_UACCESS_ENABLE; + else + domain = *(unsigned int *)(regs + 1); +#endif + + if ((domain & domain_mask(DOMAIN_USER)) == + domain_val(DOMAIN_USER, DOMAIN_NOACCESS)) + segment = "none"; + else if (get_fs() == get_ds()) + segment = "kernel"; + else + segment = "user"; + + printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n", + buf, interrupts_enabled(regs) ? "n" : "ff", + fast_interrupts_enabled(regs) ? "n" : "ff", + processor_modes[processor_mode(regs)], + isa_modes[isa_mode(regs)], segment); + } #else printk("xPSR: %08lx\n", regs->ARM_cpsr); #endif diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 1361756782c7..5b26e7efa9ea 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -141,11 +141,14 @@ static int emulate_swpX(unsigned int address, unsigned int *data, while (1) { unsigned long temp; + unsigned int __ua_flags; + __ua_flags = uaccess_save_and_enable(); if (type == TYPE_SWPB) __user_swpb_asm(*data, address, res, temp); else __user_swp_asm(*data, address, res, temp); + uaccess_restore(__ua_flags); if (likely(res != -EAGAIN) || signal_pending(current)) break; diff --git a/arch/arm/lib/csumpartialcopyuser.S b/arch/arm/lib/csumpartialcopyuser.S index 1d0957e61f89..1712f132b80d 100644 --- a/arch/arm/lib/csumpartialcopyuser.S +++ b/arch/arm/lib/csumpartialcopyuser.S @@ -17,6 +17,19 @@ .text +#ifdef CONFIG_CPU_SW_DOMAIN_PAN + .macro save_regs + mrc p15, 0, ip, c3, c0, 0 + stmfd sp!, {r1, r2, r4 - r8, ip, lr} + uaccess_enable ip + .endm + + .macro load_regs + ldmfd sp!, {r1, r2, r4 - r8, ip, lr} + mcr p15, 0, ip, c3, c0, 0 + ret lr + .endm +#else .macro save_regs stmfd sp!, {r1, r2, r4 - r8, lr} .endm @@ -24,6 +37,7 @@ .macro load_regs ldmfd sp!, {r1, r2, r4 - r8, pc} .endm +#endif .macro load1b, reg1 ldrusr \reg1, r0, 1 -- cgit